From 75909723fa2b1f6245d5c5422e4f2e88b8a26052 Mon Sep 17 00:00:00 2001 From: aktersnurra Date: Sun, 15 Nov 2020 17:40:44 +0100 Subject: Able to generate support files for lines datasets. --- .../support/create_emnist_lines_support_files.py | 30 ++++++++++++++------- .../support/create_iam_lines_support_files.py | 27 ++++++++++++------- .../tests/support/emnist_lines/Knox Ky.png | Bin 0 -> 2301 bytes .../emnist_lines/ancillary beliefs and.png | Bin 0 -> 5424 bytes .../tests/support/emnist_lines/they.png | Bin 0 -> 1391 bytes .../He rose from his breakfast-nook bench.png | Bin 0 -> 5170 bytes .../and came into the livingroom, where.png | Bin 0 -> 3617 bytes .../his entrance. He came, almost falling.png | Bin 0 -> 3923 bytes 8 files changed, 38 insertions(+), 19 deletions(-) create mode 100644 src/text_recognizer/tests/support/emnist_lines/Knox Ky.png create mode 100644 src/text_recognizer/tests/support/emnist_lines/ancillary beliefs and.png create mode 100644 src/text_recognizer/tests/support/emnist_lines/they.png create mode 100644 src/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png create mode 100644 src/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png create mode 100644 src/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png (limited to 'src/text_recognizer/tests') diff --git a/src/text_recognizer/tests/support/create_emnist_lines_support_files.py b/src/text_recognizer/tests/support/create_emnist_lines_support_files.py index 4496e40..9abe143 100644 --- a/src/text_recognizer/tests/support/create_emnist_lines_support_files.py +++ b/src/text_recognizer/tests/support/create_emnist_lines_support_files.py @@ -1,4 +1,6 @@ """Module for creating EMNIST Lines test support files.""" +# flake8: noqa: S106 + from pathlib import Path import shutil @@ -17,23 +19,31 @@ def create_emnist_lines_support_files() -> None: SUPPORT_DIRNAME.mkdir() # TODO: maybe have to add args to dataset. - dataset = EmnistLinesDataset() + dataset = EmnistLinesDataset( + init_token="", + pad_token="_", + eos_token="", + transform=[{"type": "ToTensor", "args": {}}], + target_transform=[ + { + "type": "AddTokens", + "args": {"init_token": "", "pad_token": "_", "eos_token": ""}, + } + ], + ) # nosec: S106 dataset.load_or_generate_data() - for index in [0, 1, 3]: + for index in [5, 7, 9]: image, target = dataset[index] + if len(image.shape) == 3: + image = image.squeeze(0) print(image.sum(), image.dtype) - label = ( - "".join( - dataset.mapper[label] - for label in np.argmax(target[1:], dim=-1).flatten() - ) - .stip() - .strip(dataset.mapper.pad_token) + label = "".join(dataset.mapper(label) for label in target[1:]).strip( + dataset.mapper.pad_token ) - print(label) + image = image.numpy() util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) diff --git a/src/text_recognizer/tests/support/create_iam_lines_support_files.py b/src/text_recognizer/tests/support/create_iam_lines_support_files.py index bb568ee..50f9e3d 100644 --- a/src/text_recognizer/tests/support/create_iam_lines_support_files.py +++ b/src/text_recognizer/tests/support/create_iam_lines_support_files.py @@ -1,4 +1,5 @@ """Module for creating IAM Lines test support files.""" +# flake8: noqa from pathlib import Path import shutil @@ -17,23 +18,31 @@ def create_emnist_lines_support_files() -> None: SUPPORT_DIRNAME.mkdir() # TODO: maybe have to add args to dataset. - dataset = IamLinesDataset() + dataset = IamLinesDataset( + init_token="", + pad_token="_", + eos_token="", + transform=[{"type": "ToTensor", "args": {}}], + target_transform=[ + { + "type": "AddTokens", + "args": {"init_token": "", "pad_token": "_", "eos_token": ""}, + } + ], + ) dataset.load_or_generate_data() for index in [0, 1, 3]: image, target = dataset[index] + if len(image.shape) == 3: + image = image.squeeze(0) print(image.sum(), image.dtype) - label = ( - "".join( - dataset.mapper[label] - for label in np.argmax(target[1:], dim=-1).flatten() - ) - .stip() - .strip(dataset.mapper.pad_token) + label = "".join(dataset.mapper(label) for label in target[1:]).strip( + dataset.mapper.pad_token ) - print(label) + image = image.numpy() util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) diff --git a/src/text_recognizer/tests/support/emnist_lines/Knox Ky.png b/src/text_recognizer/tests/support/emnist_lines/Knox Ky.png new file mode 100644 index 0000000..b7d0618 Binary files /dev/null and b/src/text_recognizer/tests/support/emnist_lines/Knox Ky.png differ diff --git a/src/text_recognizer/tests/support/emnist_lines/ancillary beliefs and.png b/src/text_recognizer/tests/support/emnist_lines/ancillary beliefs and.png new file mode 100644 index 0000000..14a8cf3 Binary files /dev/null and b/src/text_recognizer/tests/support/emnist_lines/ancillary beliefs and.png differ diff --git a/src/text_recognizer/tests/support/emnist_lines/they.png b/src/text_recognizer/tests/support/emnist_lines/they.png new file mode 100644 index 0000000..7f05951 Binary files /dev/null and b/src/text_recognizer/tests/support/emnist_lines/they.png differ diff --git a/src/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png b/src/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png new file mode 100644 index 0000000..6eeb642 Binary files /dev/null and b/src/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png differ diff --git a/src/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png b/src/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png new file mode 100644 index 0000000..4974cf8 Binary files /dev/null and b/src/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png differ diff --git a/src/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png b/src/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png new file mode 100644 index 0000000..a731245 Binary files /dev/null and b/src/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png differ -- cgit v1.2.3-70-g09d2