summaryrefslogtreecommitdiff
path: root/text_recognizer/metadata/emnist.py
diff options
context:
space:
mode:
Diffstat (limited to 'text_recognizer/metadata/emnist.py')
-rw-r--r--text_recognizer/metadata/emnist.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/text_recognizer/metadata/emnist.py b/text_recognizer/metadata/emnist.py
new file mode 100644
index 0000000..23ddcc7
--- /dev/null
+++ b/text_recognizer/metadata/emnist.py
@@ -0,0 +1,106 @@
+from pathlib import Path
+
+import text_recognizer.metadata.shared as shared
+
+RAW_DATA_DIRNAME = shared.DATA_DIRNAME / "raw" / "emnist"
+METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
+DL_DATA_DIRNAME = shared.DATA_DIRNAME / "downloaded" / "emnist"
+PROCESSED_DATA_DIRNAME = shared.DATA_DIRNAME / "processed" / "emnist"
+PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "byclass.h5"
+ESSENTIALS_FILENAME = (
+ Path(__file__).parents[1].resolve() / "data" / "emnist_essentials.json"
+)
+
+SEED = 4711
+NUM_SPECIAL_TOKENS = 4
+SAMPLE_TO_BALANCE = True
+
+INPUT_SHAPE = (28, 28)
+DIMS = (1, *INPUT_SHAPE) # Extra dimension added by ToTensor()
+OUTPUT_DIMS = (1,)
+
+MAPPING = [
+ "<B>",
+ "<S>",
+ "<E>",
+ "<P>",
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ " ",
+ "!",
+ '"',
+ "#",
+ "&",
+ "'",
+ "(",
+ ")",
+ "*",
+ "+",
+ ",",
+ "-",
+ ".",
+ "/",
+ ":",
+ ";",
+ "?",
+]