diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-10-10 18:04:50 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-10-10 18:04:50 +0200 |
commit | 8291a87c64f9a5f18caec82201bea15579b49730 (patch) | |
tree | 1c8bb3e07a3bd06086e182dd320f8408829ba81c | |
parent | 30e3ae483c846418b04ed48f014a4af2cf9a0771 (diff) |
Move data utils to submodules
-rw-r--r-- | text_recognizer/data/mappings/base_mapping.py (renamed from text_recognizer/data/base_mapping.py) | 0 | ||||
-rw-r--r-- | text_recognizer/data/mappings/emnist_essentials.json (renamed from text_recognizer/data/emnist_essentials.json) | 0 | ||||
-rw-r--r-- | text_recognizer/data/mappings/emnist_mapping.py (renamed from text_recognizer/data/emnist_mapping.py) | 2 | ||||
-rw-r--r-- | text_recognizer/data/mappings/word_piece_mapping.py (renamed from text_recognizer/data/word_piece_mapping.py) | 8 | ||||
-rw-r--r-- | text_recognizer/data/transforms/word_piece.py (renamed from text_recognizer/data/transforms.py) | 7 | ||||
-rw-r--r-- | text_recognizer/data/utils/build_transitions.py (renamed from text_recognizer/data/build_transitions.py) | 0 | ||||
-rw-r--r-- | text_recognizer/data/utils/download_utils.py (renamed from text_recognizer/data/download_utils.py) | 0 | ||||
-rw-r--r-- | text_recognizer/data/utils/iam_preprocessor.py (renamed from text_recognizer/data/iam_preprocessor.py) | 2 | ||||
-rw-r--r-- | text_recognizer/data/utils/image_utils.py (renamed from text_recognizer/data/image_utils.py) | 0 | ||||
-rw-r--r-- | text_recognizer/data/utils/make_wordpieces.py (renamed from text_recognizer/data/make_wordpieces.py) | 0 | ||||
-rw-r--r-- | text_recognizer/data/utils/sentence_generator.py (renamed from text_recognizer/data/sentence_generator.py) | 0 |
11 files changed, 8 insertions, 11 deletions
diff --git a/text_recognizer/data/base_mapping.py b/text_recognizer/data/mappings/base_mapping.py index 572ac95..572ac95 100644 --- a/text_recognizer/data/base_mapping.py +++ b/text_recognizer/data/mappings/base_mapping.py diff --git a/text_recognizer/data/emnist_essentials.json b/text_recognizer/data/mappings/emnist_essentials.json index c412425..c412425 100644 --- a/text_recognizer/data/emnist_essentials.json +++ b/text_recognizer/data/mappings/emnist_essentials.json diff --git a/text_recognizer/data/emnist_mapping.py b/text_recognizer/data/mappings/emnist_mapping.py index b2165d2..3eed3d8 100644 --- a/text_recognizer/data/emnist_mapping.py +++ b/text_recognizer/data/mappings/emnist_mapping.py @@ -4,7 +4,7 @@ from typing import List, Optional, Set, Union import torch from torch import Tensor -from text_recognizer.data.base_mapping import AbstractMapping +from text_recognizer.data.mappings.base_mapping import AbstractMapping from text_recognizer.data.emnist import emnist_mapping diff --git a/text_recognizer/data/word_piece_mapping.py b/text_recognizer/data/mappings/word_piece_mapping.py index dc56942..6f1790e 100644 --- a/text_recognizer/data/word_piece_mapping.py +++ b/text_recognizer/data/mappings/word_piece_mapping.py @@ -6,8 +6,8 @@ from loguru import logger as log import torch from torch import Tensor -from text_recognizer.data.emnist_mapping import EmnistMapping -from text_recognizer.data.iam_preprocessor import Preprocessor +from text_recognizer.data.mappings.emnist_mapping import EmnistMapping +from text_recognizer.data.utils.iam_preprocessor import Preprocessor class WordPieceMapping(EmnistMapping): @@ -27,7 +27,7 @@ class WordPieceMapping(EmnistMapping): super().__init__(extra_symbols=extra_symbols) self.data_dir = ( ( - Path(__file__).resolve().parents[2] + Path(__file__).resolve().parents[3] / "data" / "downloaded" / "iam" @@ -41,7 +41,7 @@ class WordPieceMapping(EmnistMapping): raise RuntimeError(f"Could not locate iamdb directory at {self.data_dir}") processed_path = ( - Path(__file__).resolve().parents[2] / "data" / "processed" / "iam_lines" + Path(__file__).resolve().parents[3] / "data" / "processed" / "iam_lines" ) tokens_path = processed_path / tokens diff --git a/text_recognizer/data/transforms.py b/text_recognizer/data/transforms/word_piece.py index 7f3e0d1..6bf5472 100644 --- a/text_recognizer/data/transforms.py +++ b/text_recognizer/data/transforms/word_piece.py @@ -1,12 +1,11 @@ -"""Transforms for PyTorch datasets.""" +"""Target transform for word pieces.""" from pathlib import Path -from typing import Optional, Union, Type, Set +from typing import Optional, Union, Set import torch from torch import Tensor -from text_recognizer.data.base_mapping import AbstractMapping -from text_recognizer.data.word_piece_mapping import WordPieceMapping +from text_recognizer.data.mappings.word_piece_mapping import WordPieceMapping class WordPiece: diff --git a/text_recognizer/data/build_transitions.py b/text_recognizer/data/utils/build_transitions.py index 0f987ca..0f987ca 100644 --- a/text_recognizer/data/build_transitions.py +++ b/text_recognizer/data/utils/build_transitions.py diff --git a/text_recognizer/data/download_utils.py b/text_recognizer/data/utils/download_utils.py index a5a5360..a5a5360 100644 --- a/text_recognizer/data/download_utils.py +++ b/text_recognizer/data/utils/download_utils.py diff --git a/text_recognizer/data/iam_preprocessor.py b/text_recognizer/data/utils/iam_preprocessor.py index 700944e..60ecff1 100644 --- a/text_recognizer/data/iam_preprocessor.py +++ b/text_recognizer/data/utils/iam_preprocessor.py @@ -45,8 +45,6 @@ def load_metadata( class Preprocessor: """A preprocessor for the IAM dataset.""" - # TODO: attrs - def __init__( self, data_dir: Union[str, Path], diff --git a/text_recognizer/data/image_utils.py b/text_recognizer/data/utils/image_utils.py index c2b8915..c2b8915 100644 --- a/text_recognizer/data/image_utils.py +++ b/text_recognizer/data/utils/image_utils.py diff --git a/text_recognizer/data/make_wordpieces.py b/text_recognizer/data/utils/make_wordpieces.py index 8e53815..8e53815 100644 --- a/text_recognizer/data/make_wordpieces.py +++ b/text_recognizer/data/utils/make_wordpieces.py diff --git a/text_recognizer/data/sentence_generator.py b/text_recognizer/data/utils/sentence_generator.py index 8567e6d..8567e6d 100644 --- a/text_recognizer/data/sentence_generator.py +++ b/text_recognizer/data/utils/sentence_generator.py |