diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-27 00:08:04 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-27 00:08:04 +0200 |
commit | 27ff7d113108e9cc51ddc5ff13b648b9c75fa865 (patch) | |
tree | 96b35c2f65978b8718665aaded3d29f00aaf43e2 /text_recognizer/metadata | |
parent | 3227735099f8acb37ffe658b8f04b6c308b64d23 (diff) |
Add metadata
Diffstat (limited to 'text_recognizer/metadata')
-rw-r--r-- | text_recognizer/metadata/__init__.py | 1 | ||||
-rw-r--r-- | text_recognizer/metadata/emnist.py | 106 | ||||
-rw-r--r-- | text_recognizer/metadata/emnist_lines.py | 21 | ||||
-rw-r--r-- | text_recognizer/metadata/iam.py | 9 | ||||
-rw-r--r-- | text_recognizer/metadata/iam_lines.py | 15 | ||||
-rw-r--r-- | text_recognizer/metadata/iam_paragraphs.py | 18 | ||||
-rw-r--r-- | text_recognizer/metadata/iam_synthetic_paragraphs.py | 6 | ||||
-rw-r--r-- | text_recognizer/metadata/shared.py | 4 |
8 files changed, 180 insertions, 0 deletions
diff --git a/text_recognizer/metadata/__init__.py b/text_recognizer/metadata/__init__.py new file mode 100644 index 0000000..6ca296a --- /dev/null +++ b/text_recognizer/metadata/__init__.py @@ -0,0 +1 @@ +"""Metadata for datasets.""" diff --git a/text_recognizer/metadata/emnist.py b/text_recognizer/metadata/emnist.py new file mode 100644 index 0000000..23ddcc7 --- /dev/null +++ b/text_recognizer/metadata/emnist.py @@ -0,0 +1,106 @@ +from pathlib import Path + +import text_recognizer.metadata.shared as shared + +RAW_DATA_DIRNAME = shared.DATA_DIRNAME / "raw" / "emnist" +METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml" +DL_DATA_DIRNAME = shared.DATA_DIRNAME / "downloaded" / "emnist" +PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "emnist" +PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "byclass.h5" +ESSENTIALS_FILENAME = ( + Path(__file__).parents[1].resolve() / "data" / "emnist_essentials.json" +) + +SEED = 4711 +NUM_SPECIAL_TOKENS = 4 +SAMPLE_TO_BALANCE = True + +INPUT_SHAPE = (28, 28) +DIMS = (1, *INPUT_SHAPE) # Extra dimension added by ToTensor() +OUTPUT_DIMS = (1,) + +MAPPING = [ + "<B>", + "<S>", + "<E>", + "<P>", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + " ", + "!", + '"', + "#", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + ":", + ";", + "?", +] diff --git a/text_recognizer/metadata/emnist_lines.py b/text_recognizer/metadata/emnist_lines.py new file mode 100644 index 0000000..5a329bb --- /dev/null +++ b/text_recognizer/metadata/emnist_lines.py @@ -0,0 +1,21 @@ +from pathlib import Path + +import text_recognizer.metadata.emnist as emnist +import text_recognizer.metadata.shared as shared + +PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "emnist_lines" +ESSENTIALS_FILENAME = ( + Path(__file__).parents[1].resolve() + / "data" + / "mappings" + / "emnist_lines_essentials.json" +) + +CHAR_HEIGHT, CHAR_WIDTH = emnist.DIMS[1:3] +DIMS = ( + emnist.DIMS[0], + CHAR_HEIGHT, + None, +) # width variable, depends on maximum sequence length + +MAPPING = emnist.MAPPING diff --git a/text_recognizer/metadata/iam.py b/text_recognizer/metadata/iam.py new file mode 100644 index 0000000..6995f83 --- /dev/null +++ b/text_recognizer/metadata/iam.py @@ -0,0 +1,9 @@ +import text_recognizer.metadata.shared as shared + +RAW_DATA_DIRNAME = shared.DATA_DIRNAME / "raw" / "iam" +METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml" +DL_DATA_DIRNAME = shared.DATA_DIRNAME / "downloaded" / "iam" +EXTRACTED_DATASET_DIRNAME = DL_DATA_DIRNAME / "iamdb" + +DOWNSAMPLE_FACTOR = 2 # if images were downsampled, the regions must also be +LINE_REGION_PADDING = 8 # add this many pixels around the exact coordinates diff --git a/text_recognizer/metadata/iam_lines.py b/text_recognizer/metadata/iam_lines.py new file mode 100644 index 0000000..73b2a28 --- /dev/null +++ b/text_recognizer/metadata/iam_lines.py @@ -0,0 +1,15 @@ +import text_recognizer.metadata.emnist as emnist +import text_recognizer.metadata.shared as shared + +PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "iam_lines" + +IMAGE_SCALE_FACTOR = 2 + +CHAR_WIDTH = emnist.INPUT_SHAPE[0] // IMAGE_SCALE_FACTOR # rough estimate +IMAGE_HEIGHT = 112 // IMAGE_SCALE_FACTOR +IMAGE_WIDTH = 3072 // IMAGE_SCALE_FACTOR # rounding up IAMLines empirical maximum width + +DIMS = (1, IMAGE_HEIGHT, IMAGE_WIDTH) +OUTPUT_DIMS = (89, 1) + +MAPPING = emnist.MAPPING diff --git a/text_recognizer/metadata/iam_paragraphs.py b/text_recognizer/metadata/iam_paragraphs.py new file mode 100644 index 0000000..70b802b --- /dev/null +++ b/text_recognizer/metadata/iam_paragraphs.py @@ -0,0 +1,18 @@ +import text_recognizer.metadata.emnist as emnist +import text_recognizer.metadata.shared as shared + + +PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "iam_paragraphs" + +NEW_LINE_TOKEN = "\n" +MAPPING = [*emnist.MAPPING, NEW_LINE_TOKEN] + +# must match IMAGE_SCALE_FACTOR for IAMLines to be compatible with synthetic paragraphs +IMAGE_SCALE_FACTOR = 2 +IMAGE_HEIGHT, IMAGE_WIDTH = 576, 640 +IMAGE_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH) + +MAX_LABEL_LENGTH = 682 + +DIMS = (1, IMAGE_HEIGHT, IMAGE_WIDTH) +OUTPUT_DIMS = (MAX_LABEL_LENGTH, 1) diff --git a/text_recognizer/metadata/iam_synthetic_paragraphs.py b/text_recognizer/metadata/iam_synthetic_paragraphs.py new file mode 100644 index 0000000..15bcfc8 --- /dev/null +++ b/text_recognizer/metadata/iam_synthetic_paragraphs.py @@ -0,0 +1,6 @@ +import text_recognizer.metadata.iam_paragraphs as iam_paragraphs +import text_recognizer.metadata.shared as shared + +NEW_LINE_TOKEN = iam_paragraphs.NEW_LINE_TOKEN + +PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "iam_synthetic_paragraphs" diff --git a/text_recognizer/metadata/shared.py b/text_recognizer/metadata/shared.py new file mode 100644 index 0000000..a4d1da0 --- /dev/null +++ b/text_recognizer/metadata/shared.py @@ -0,0 +1,4 @@ +from pathlib import Path + +DATA_DIRNAME = Path(__file__).resolve().parents[2] / "data" +DOWNLOADED_DATA_DIRNAME = DATA_DIRNAME / "downloded" |