From e3741de333a3a43a7968241b6eccaaac66dd7b20 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sun, 21 Mar 2021 22:33:58 +0100 Subject: Working on EMNIST Lines dataset --- tests/support/create_iam_lines_support_files.py | 50 +++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/support/create_iam_lines_support_files.py (limited to 'tests/support/create_iam_lines_support_files.py') diff --git a/tests/support/create_iam_lines_support_files.py b/tests/support/create_iam_lines_support_files.py new file mode 100644 index 0000000..50f9e3d --- /dev/null +++ b/tests/support/create_iam_lines_support_files.py @@ -0,0 +1,50 @@ +"""Module for creating IAM Lines test support files.""" +# flake8: noqa +from pathlib import Path +import shutil + +import numpy as np + +from text_recognizer.datasets import IamLinesDataset +import text_recognizer.util as util + + +SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" + + +def create_emnist_lines_support_files() -> None: + """Create IAM Lines test images.""" + shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) + SUPPORT_DIRNAME.mkdir() + + # TODO: maybe have to add args to dataset. + dataset = IamLinesDataset( + init_token="", + pad_token="_", + eos_token="", + transform=[{"type": "ToTensor", "args": {}}], + target_transform=[ + { + "type": "AddTokens", + "args": {"init_token": "", "pad_token": "_", "eos_token": ""}, + } + ], + ) + dataset.load_or_generate_data() + + for index in [0, 1, 3]: + image, target = dataset[index] + if len(image.shape) == 3: + image = image.squeeze(0) + print(image.sum(), image.dtype) + + label = "".join(dataset.mapper(label) for label in target[1:]).strip( + dataset.mapper.pad_token + ) + print(label) + image = image.numpy() + util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) + + +if __name__ == "__main__": + create_emnist_lines_support_files() -- cgit v1.2.3-70-g09d2