summaryrefslogtreecommitdiff
path: root/text_recognizer/metadata
diff options
context:
space:
mode:
Diffstat (limited to 'text_recognizer/metadata')
-rw-r--r--text_recognizer/metadata/__init__.py1
-rw-r--r--text_recognizer/metadata/emnist.py106
-rw-r--r--text_recognizer/metadata/emnist_lines.py21
-rw-r--r--text_recognizer/metadata/iam.py9
-rw-r--r--text_recognizer/metadata/iam_lines.py15
-rw-r--r--text_recognizer/metadata/iam_paragraphs.py18
-rw-r--r--text_recognizer/metadata/iam_synthetic_paragraphs.py6
-rw-r--r--text_recognizer/metadata/shared.py4
8 files changed, 180 insertions, 0 deletions
diff --git a/text_recognizer/metadata/__init__.py b/text_recognizer/metadata/__init__.py
new file mode 100644
index 0000000..6ca296a
--- /dev/null
+++ b/text_recognizer/metadata/__init__.py
@@ -0,0 +1 @@
+"""Metadata for datasets."""
diff --git a/text_recognizer/metadata/emnist.py b/text_recognizer/metadata/emnist.py
new file mode 100644
index 0000000..23ddcc7
--- /dev/null
+++ b/text_recognizer/metadata/emnist.py
@@ -0,0 +1,106 @@
+from pathlib import Path
+
+import text_recognizer.metadata.shared as shared
+
+RAW_DATA_DIRNAME = shared.DATA_DIRNAME / "raw" / "emnist"
+METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
+DL_DATA_DIRNAME = shared.DATA_DIRNAME / "downloaded" / "emnist"
+PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "emnist"
+PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "byclass.h5"
+ESSENTIALS_FILENAME = (
+ Path(__file__).parents[1].resolve() / "data" / "emnist_essentials.json"
+)
+
+SEED = 4711
+NUM_SPECIAL_TOKENS = 4
+SAMPLE_TO_BALANCE = True
+
+INPUT_SHAPE = (28, 28)
+DIMS = (1, *INPUT_SHAPE) # Extra dimension added by ToTensor()
+OUTPUT_DIMS = (1,)
+
+MAPPING = [
+ "<B>",
+ "<S>",
+ "<E>",
+ "<P>",
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ " ",
+ "!",
+ '"',
+ "#",
+ "&",
+ "'",
+ "(",
+ ")",
+ "*",
+ "+",
+ ",",
+ "-",
+ ".",
+ "/",
+ ":",
+ ";",
+ "?",
+]
diff --git a/text_recognizer/metadata/emnist_lines.py b/text_recognizer/metadata/emnist_lines.py
new file mode 100644
index 0000000..5a329bb
--- /dev/null
+++ b/text_recognizer/metadata/emnist_lines.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+import text_recognizer.metadata.emnist as emnist
+import text_recognizer.metadata.shared as shared
+
+PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "emnist_lines"
+ESSENTIALS_FILENAME = (
+ Path(__file__).parents[1].resolve()
+ / "data"
+ / "mappings"
+ / "emnist_lines_essentials.json"
+)
+
+CHAR_HEIGHT, CHAR_WIDTH = emnist.DIMS[1:3]
+DIMS = (
+ emnist.DIMS[0],
+ CHAR_HEIGHT,
+ None,
+) # width variable, depends on maximum sequence length
+
+MAPPING = emnist.MAPPING
diff --git a/text_recognizer/metadata/iam.py b/text_recognizer/metadata/iam.py
new file mode 100644
index 0000000..6995f83
--- /dev/null
+++ b/text_recognizer/metadata/iam.py
@@ -0,0 +1,9 @@
+import text_recognizer.metadata.shared as shared
+
+RAW_DATA_DIRNAME = shared.DATA_DIRNAME / "raw" / "iam"
+METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
+DL_DATA_DIRNAME = shared.DATA_DIRNAME / "downloaded" / "iam"
+EXTRACTED_DATASET_DIRNAME = DL_DATA_DIRNAME / "iamdb"
+
+DOWNSAMPLE_FACTOR = 2 # if images were downsampled, the regions must also be
+LINE_REGION_PADDING = 8 # add this many pixels around the exact coordinates
diff --git a/text_recognizer/metadata/iam_lines.py b/text_recognizer/metadata/iam_lines.py
new file mode 100644
index 0000000..73b2a28
--- /dev/null
+++ b/text_recognizer/metadata/iam_lines.py
@@ -0,0 +1,15 @@
+import text_recognizer.metadata.emnist as emnist
+import text_recognizer.metadata.shared as shared
+
+PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "iam_lines"
+
+IMAGE_SCALE_FACTOR = 2
+
+CHAR_WIDTH = emnist.INPUT_SHAPE[0] // IMAGE_SCALE_FACTOR # rough estimate
+IMAGE_HEIGHT = 112 // IMAGE_SCALE_FACTOR
+IMAGE_WIDTH = 3072 // IMAGE_SCALE_FACTOR # rounding up IAMLines empirical maximum width
+
+DIMS = (1, IMAGE_HEIGHT, IMAGE_WIDTH)
+OUTPUT_DIMS = (89, 1)
+
+MAPPING = emnist.MAPPING
diff --git a/text_recognizer/metadata/iam_paragraphs.py b/text_recognizer/metadata/iam_paragraphs.py
new file mode 100644
index 0000000..70b802b
--- /dev/null
+++ b/text_recognizer/metadata/iam_paragraphs.py
@@ -0,0 +1,18 @@
+import text_recognizer.metadata.emnist as emnist
+import text_recognizer.metadata.shared as shared
+
+
+PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "iam_paragraphs"
+
+NEW_LINE_TOKEN = "\n"
+MAPPING = [*emnist.MAPPING, NEW_LINE_TOKEN]
+
+# must match IMAGE_SCALE_FACTOR for IAMLines to be compatible with synthetic paragraphs
+IMAGE_SCALE_FACTOR = 2
+IMAGE_HEIGHT, IMAGE_WIDTH = 576, 640
+IMAGE_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH)
+
+MAX_LABEL_LENGTH = 682
+
+DIMS = (1, IMAGE_HEIGHT, IMAGE_WIDTH)
+OUTPUT_DIMS = (MAX_LABEL_LENGTH, 1)
diff --git a/text_recognizer/metadata/iam_synthetic_paragraphs.py b/text_recognizer/metadata/iam_synthetic_paragraphs.py
new file mode 100644
index 0000000..15bcfc8
--- /dev/null
+++ b/text_recognizer/metadata/iam_synthetic_paragraphs.py
@@ -0,0 +1,6 @@
+import text_recognizer.metadata.iam_paragraphs as iam_paragraphs
+import text_recognizer.metadata.shared as shared
+
+NEW_LINE_TOKEN = iam_paragraphs.NEW_LINE_TOKEN
+
+PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "iam_synthetic_paragraphs"
diff --git a/text_recognizer/metadata/shared.py b/text_recognizer/metadata/shared.py
new file mode 100644
index 0000000..a4d1da0
--- /dev/null
+++ b/text_recognizer/metadata/shared.py
@@ -0,0 +1,4 @@
+from pathlib import Path
+
+DATA_DIRNAME = Path(__file__).resolve().parents[2] / "data"
+DOWNLOADED_DATA_DIRNAME = DATA_DIRNAME / "downloded"