From e3741de333a3a43a7968241b6eccaaac66dd7b20 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sun, 21 Mar 2021 22:33:58 +0100 Subject: Working on EMNIST Lines dataset --- tests/support/__init__.py | 2 + tests/support/create_emnist_lines_support_files.py | 51 +++++++++++++++++++++ tests/support/create_emnist_support_files.py | 30 ++++++++++++ tests/support/create_iam_lines_support_files.py | 50 ++++++++++++++++++++ tests/support/emnist_lines/Knox Ky.png | Bin 0 -> 2301 bytes .../emnist_lines/ancillary beliefs and.png | Bin 0 -> 5424 bytes tests/support/emnist_lines/they.png | Bin 0 -> 1391 bytes .../He rose from his breakfast-nook bench.png | Bin 0 -> 5170 bytes .../and came into the livingroom, where.png | Bin 0 -> 3617 bytes .../his entrance. He came, almost falling.png | Bin 0 -> 3923 bytes tests/support/iam_paragraphs/a01-000u.jpg | Bin 0 -> 14890 bytes 11 files changed, 133 insertions(+) create mode 100644 tests/support/__init__.py create mode 100644 tests/support/create_emnist_lines_support_files.py create mode 100644 tests/support/create_emnist_support_files.py create mode 100644 tests/support/create_iam_lines_support_files.py create mode 100644 tests/support/emnist_lines/Knox Ky.png create mode 100644 tests/support/emnist_lines/ancillary beliefs and.png create mode 100644 tests/support/emnist_lines/they.png create mode 100644 tests/support/iam_lines/He rose from his breakfast-nook bench.png create mode 100644 tests/support/iam_lines/and came into the livingroom, where.png create mode 100644 tests/support/iam_lines/his entrance. He came, almost falling.png create mode 100644 tests/support/iam_paragraphs/a01-000u.jpg (limited to 'tests/support') diff --git a/tests/support/__init__.py b/tests/support/__init__.py new file mode 100644 index 0000000..a265ede --- /dev/null +++ b/tests/support/__init__.py @@ -0,0 +1,2 @@ +"""Support file modules.""" +from .create_emnist_support_files import create_emnist_support_files diff --git a/tests/support/create_emnist_lines_support_files.py b/tests/support/create_emnist_lines_support_files.py new file mode 100644 index 0000000..9abe143 --- /dev/null +++ b/tests/support/create_emnist_lines_support_files.py @@ -0,0 +1,51 @@ +"""Module for creating EMNIST Lines test support files.""" +# flake8: noqa: S106 + +from pathlib import Path +import shutil + +import numpy as np + +from text_recognizer.datasets import EmnistLinesDataset +import text_recognizer.util as util + + +SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" + + +def create_emnist_lines_support_files() -> None: + """Create EMNIST Lines test images.""" + shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) + SUPPORT_DIRNAME.mkdir() + + # TODO: maybe have to add args to dataset. + dataset = EmnistLinesDataset( + init_token="", + pad_token="_", + eos_token="", + transform=[{"type": "ToTensor", "args": {}}], + target_transform=[ + { + "type": "AddTokens", + "args": {"init_token": "", "pad_token": "_", "eos_token": ""}, + } + ], + ) # nosec: S106 + dataset.load_or_generate_data() + + for index in [5, 7, 9]: + image, target = dataset[index] + if len(image.shape) == 3: + image = image.squeeze(0) + print(image.sum(), image.dtype) + + label = "".join(dataset.mapper(label) for label in target[1:]).strip( + dataset.mapper.pad_token + ) + print(label) + image = image.numpy() + util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) + + +if __name__ == "__main__": + create_emnist_lines_support_files() diff --git a/tests/support/create_emnist_support_files.py b/tests/support/create_emnist_support_files.py new file mode 100644 index 0000000..f9ff030 --- /dev/null +++ b/tests/support/create_emnist_support_files.py @@ -0,0 +1,30 @@ +"""Module for creating EMNIST test support files.""" +from pathlib import Path +import shutil + +from text_recognizer.datasets import EmnistDataset +from text_recognizer.util import write_image + +SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" + + +def create_emnist_support_files() -> None: + """Create support images for test of CharacterPredictor class.""" + shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) + SUPPORT_DIRNAME.mkdir() + + dataset = EmnistDataset(train=False) + dataset.load_or_generate_data() + + for index in [5, 7, 9]: + image, label = dataset[index] + if len(image.shape) == 3: + image = image.squeeze(0) + image = image.numpy() + label = dataset.mapper(int(label)) + print(index, label) + write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) + + +if __name__ == "__main__": + create_emnist_support_files() diff --git a/tests/support/create_iam_lines_support_files.py b/tests/support/create_iam_lines_support_files.py new file mode 100644 index 0000000..50f9e3d --- /dev/null +++ b/tests/support/create_iam_lines_support_files.py @@ -0,0 +1,50 @@ +"""Module for creating IAM Lines test support files.""" +# flake8: noqa +from pathlib import Path +import shutil + +import numpy as np + +from text_recognizer.datasets import IamLinesDataset +import text_recognizer.util as util + + +SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" + + +def create_emnist_lines_support_files() -> None: + """Create IAM Lines test images.""" + shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) + SUPPORT_DIRNAME.mkdir() + + # TODO: maybe have to add args to dataset. + dataset = IamLinesDataset( + init_token="", + pad_token="_", + eos_token="", + transform=[{"type": "ToTensor", "args": {}}], + target_transform=[ + { + "type": "AddTokens", + "args": {"init_token": "", "pad_token": "_", "eos_token": ""}, + } + ], + ) + dataset.load_or_generate_data() + + for index in [0, 1, 3]: + image, target = dataset[index] + if len(image.shape) == 3: + image = image.squeeze(0) + print(image.sum(), image.dtype) + + label = "".join(dataset.mapper(label) for label in target[1:]).strip( + dataset.mapper.pad_token + ) + print(label) + image = image.numpy() + util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) + + +if __name__ == "__main__": + create_emnist_lines_support_files() diff --git a/tests/support/emnist_lines/Knox Ky.png b/tests/support/emnist_lines/Knox Ky.png new file mode 100644 index 0000000..b7d0618 Binary files /dev/null and b/tests/support/emnist_lines/Knox Ky.png differ diff --git a/tests/support/emnist_lines/ancillary beliefs and.png b/tests/support/emnist_lines/ancillary beliefs and.png new file mode 100644 index 0000000..14a8cf3 Binary files /dev/null and b/tests/support/emnist_lines/ancillary beliefs and.png differ diff --git a/tests/support/emnist_lines/they.png b/tests/support/emnist_lines/they.png new file mode 100644 index 0000000..7f05951 Binary files /dev/null and b/tests/support/emnist_lines/they.png differ diff --git a/tests/support/iam_lines/He rose from his breakfast-nook bench.png b/tests/support/iam_lines/He rose from his breakfast-nook bench.png new file mode 100644 index 0000000..6eeb642 Binary files /dev/null and b/tests/support/iam_lines/He rose from his breakfast-nook bench.png differ diff --git a/tests/support/iam_lines/and came into the livingroom, where.png b/tests/support/iam_lines/and came into the livingroom, where.png new file mode 100644 index 0000000..4974cf8 Binary files /dev/null and b/tests/support/iam_lines/and came into the livingroom, where.png differ diff --git a/tests/support/iam_lines/his entrance. He came, almost falling.png b/tests/support/iam_lines/his entrance. He came, almost falling.png new file mode 100644 index 0000000..a731245 Binary files /dev/null and b/tests/support/iam_lines/his entrance. He came, almost falling.png differ diff --git a/tests/support/iam_paragraphs/a01-000u.jpg b/tests/support/iam_paragraphs/a01-000u.jpg new file mode 100644 index 0000000..d9753b6 Binary files /dev/null and b/tests/support/iam_paragraphs/a01-000u.jpg differ -- cgit v1.2.3-70-g09d2