summaryrefslogtreecommitdiff
path: root/tests/support/create_emnist_lines_support_files.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-03-21 22:33:58 +0100
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-03-21 22:33:58 +0100
commite3741de333a3a43a7968241b6eccaaac66dd7b20 (patch)
tree7c50aee4ca61f77e95f1b038030292c64bbb86c2 /tests/support/create_emnist_lines_support_files.py
parentaac452a2dc008338cb543549652da293c14b6b4e (diff)
Working on EMNIST Lines dataset
Diffstat (limited to 'tests/support/create_emnist_lines_support_files.py')
-rw-r--r--tests/support/create_emnist_lines_support_files.py51
1 files changed, 51 insertions, 0 deletions
diff --git a/tests/support/create_emnist_lines_support_files.py b/tests/support/create_emnist_lines_support_files.py
new file mode 100644
index 0000000..9abe143
--- /dev/null
+++ b/tests/support/create_emnist_lines_support_files.py
@@ -0,0 +1,51 @@
+"""Module for creating EMNIST Lines test support files."""
+# flake8: noqa: S106
+
+from pathlib import Path
+import shutil
+
+import numpy as np
+
+from text_recognizer.datasets import EmnistLinesDataset
+import text_recognizer.util as util
+
+
+SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines"
+
+
+def create_emnist_lines_support_files() -> None:
+ """Create EMNIST Lines test images."""
+ shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
+ SUPPORT_DIRNAME.mkdir()
+
+ # TODO: maybe have to add args to dataset.
+ dataset = EmnistLinesDataset(
+ init_token="<sos>",
+ pad_token="_",
+ eos_token="<eos>",
+ transform=[{"type": "ToTensor", "args": {}}],
+ target_transform=[
+ {
+ "type": "AddTokens",
+ "args": {"init_token": "<sos>", "pad_token": "_", "eos_token": "<eos>"},
+ }
+ ],
+ ) # nosec: S106
+ dataset.load_or_generate_data()
+
+ for index in [5, 7, 9]:
+ image, target = dataset[index]
+ if len(image.shape) == 3:
+ image = image.squeeze(0)
+ print(image.sum(), image.dtype)
+
+ label = "".join(dataset.mapper(label) for label in target[1:]).strip(
+ dataset.mapper.pad_token
+ )
+ print(label)
+ image = image.numpy()
+ util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png"))
+
+
+if __name__ == "__main__":
+ create_emnist_lines_support_files()