summaryrefslogtreecommitdiff
path: root/tests/support/create_iam_lines_support_files.py
blob: 50f9e3d53ff0056a6617965b95c89d1fa98d7d15 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Module for creating IAM Lines test support files."""
# flake8: noqa
from pathlib import Path
import shutil

import numpy as np

from text_recognizer.datasets import IamLinesDataset
import text_recognizer.util as util


SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines"


def create_emnist_lines_support_files() -> None:
    """Create IAM Lines test images."""
    shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
    SUPPORT_DIRNAME.mkdir()

    # TODO: maybe have to add args to dataset.
    dataset = IamLinesDataset(
        init_token="<sos>",
        pad_token="_",
        eos_token="<eos>",
        transform=[{"type": "ToTensor", "args": {}}],
        target_transform=[
            {
                "type": "AddTokens",
                "args": {"init_token": "<sos>", "pad_token": "_", "eos_token": "<eos>"},
            }
        ],
    )
    dataset.load_or_generate_data()

    for index in [0, 1, 3]:
        image, target = dataset[index]
        if len(image.shape) == 3:
            image = image.squeeze(0)
        print(image.sum(), image.dtype)

        label = "".join(dataset.mapper(label) for label in target[1:]).strip(
            dataset.mapper.pad_token
        )
        print(label)
        image = image.numpy()
        util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png"))


if __name__ == "__main__":
    create_emnist_lines_support_files()