summaryrefslogtreecommitdiff
path: root/tests/support/create_emnist_lines_support_files.py
blob: 9abe143a3b50a5e843902d90a3e4c75fd75c8243 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""Module for creating EMNIST Lines test support files."""
# flake8: noqa: S106

from pathlib import Path
import shutil

import numpy as np

from text_recognizer.datasets import EmnistLinesDataset
import text_recognizer.util as util


SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines"


def create_emnist_lines_support_files() -> None:
    """Create EMNIST Lines test images."""
    shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
    SUPPORT_DIRNAME.mkdir()

    # TODO: maybe have to add args to dataset.
    dataset = EmnistLinesDataset(
        init_token="<sos>",
        pad_token="_",
        eos_token="<eos>",
        transform=[{"type": "ToTensor", "args": {}}],
        target_transform=[
            {
                "type": "AddTokens",
                "args": {"init_token": "<sos>", "pad_token": "_", "eos_token": "<eos>"},
            }
        ],
    )  # nosec: S106
    dataset.load_or_generate_data()

    for index in [5, 7, 9]:
        image, target = dataset[index]
        if len(image.shape) == 3:
            image = image.squeeze(0)
        print(image.sum(), image.dtype)

        label = "".join(dataset.mapper(label) for label in target[1:]).strip(
            dataset.mapper.pad_token
        )
        print(label)
        image = image.numpy()
        util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png"))


if __name__ == "__main__":
    create_emnist_lines_support_files()