summaryrefslogtreecommitdiff
path: root/text_recognizer/data
diff options
context:
space:
mode:
Diffstat (limited to 'text_recognizer/data')
-rw-r--r--text_recognizer/data/mappings/base_mapping.py (renamed from text_recognizer/data/base_mapping.py)0
-rw-r--r--text_recognizer/data/mappings/emnist_essentials.json (renamed from text_recognizer/data/emnist_essentials.json)0
-rw-r--r--text_recognizer/data/mappings/emnist_mapping.py (renamed from text_recognizer/data/emnist_mapping.py)2
-rw-r--r--text_recognizer/data/mappings/word_piece_mapping.py (renamed from text_recognizer/data/word_piece_mapping.py)8
-rw-r--r--text_recognizer/data/transforms/word_piece.py (renamed from text_recognizer/data/transforms.py)7
-rw-r--r--text_recognizer/data/utils/build_transitions.py (renamed from text_recognizer/data/build_transitions.py)0
-rw-r--r--text_recognizer/data/utils/download_utils.py (renamed from text_recognizer/data/download_utils.py)0
-rw-r--r--text_recognizer/data/utils/iam_preprocessor.py (renamed from text_recognizer/data/iam_preprocessor.py)2
-rw-r--r--text_recognizer/data/utils/image_utils.py (renamed from text_recognizer/data/image_utils.py)0
-rw-r--r--text_recognizer/data/utils/make_wordpieces.py (renamed from text_recognizer/data/make_wordpieces.py)0
-rw-r--r--text_recognizer/data/utils/sentence_generator.py (renamed from text_recognizer/data/sentence_generator.py)0
11 files changed, 8 insertions, 11 deletions
diff --git a/text_recognizer/data/base_mapping.py b/text_recognizer/data/mappings/base_mapping.py
index 572ac95..572ac95 100644
--- a/text_recognizer/data/base_mapping.py
+++ b/text_recognizer/data/mappings/base_mapping.py
diff --git a/text_recognizer/data/emnist_essentials.json b/text_recognizer/data/mappings/emnist_essentials.json
index c412425..c412425 100644
--- a/text_recognizer/data/emnist_essentials.json
+++ b/text_recognizer/data/mappings/emnist_essentials.json
diff --git a/text_recognizer/data/emnist_mapping.py b/text_recognizer/data/mappings/emnist_mapping.py
index b2165d2..3eed3d8 100644
--- a/text_recognizer/data/emnist_mapping.py
+++ b/text_recognizer/data/mappings/emnist_mapping.py
@@ -4,7 +4,7 @@ from typing import List, Optional, Set, Union
import torch
from torch import Tensor
-from text_recognizer.data.base_mapping import AbstractMapping
+from text_recognizer.data.mappings.base_mapping import AbstractMapping
from text_recognizer.data.emnist import emnist_mapping
diff --git a/text_recognizer/data/word_piece_mapping.py b/text_recognizer/data/mappings/word_piece_mapping.py
index dc56942..6f1790e 100644
--- a/text_recognizer/data/word_piece_mapping.py
+++ b/text_recognizer/data/mappings/word_piece_mapping.py
@@ -6,8 +6,8 @@ from loguru import logger as log
import torch
from torch import Tensor
-from text_recognizer.data.emnist_mapping import EmnistMapping
-from text_recognizer.data.iam_preprocessor import Preprocessor
+from text_recognizer.data.mappings.emnist_mapping import EmnistMapping
+from text_recognizer.data.utils.iam_preprocessor import Preprocessor
class WordPieceMapping(EmnistMapping):
@@ -27,7 +27,7 @@ class WordPieceMapping(EmnistMapping):
super().__init__(extra_symbols=extra_symbols)
self.data_dir = (
(
- Path(__file__).resolve().parents[2]
+ Path(__file__).resolve().parents[3]
/ "data"
/ "downloaded"
/ "iam"
@@ -41,7 +41,7 @@ class WordPieceMapping(EmnistMapping):
raise RuntimeError(f"Could not locate iamdb directory at {self.data_dir}")
processed_path = (
- Path(__file__).resolve().parents[2] / "data" / "processed" / "iam_lines"
+ Path(__file__).resolve().parents[3] / "data" / "processed" / "iam_lines"
)
tokens_path = processed_path / tokens
diff --git a/text_recognizer/data/transforms.py b/text_recognizer/data/transforms/word_piece.py
index 7f3e0d1..6bf5472 100644
--- a/text_recognizer/data/transforms.py
+++ b/text_recognizer/data/transforms/word_piece.py
@@ -1,12 +1,11 @@
-"""Transforms for PyTorch datasets."""
+"""Target transform for word pieces."""
from pathlib import Path
-from typing import Optional, Union, Type, Set
+from typing import Optional, Union, Set
import torch
from torch import Tensor
-from text_recognizer.data.base_mapping import AbstractMapping
-from text_recognizer.data.word_piece_mapping import WordPieceMapping
+from text_recognizer.data.mappings.word_piece_mapping import WordPieceMapping
class WordPiece:
diff --git a/text_recognizer/data/build_transitions.py b/text_recognizer/data/utils/build_transitions.py
index 0f987ca..0f987ca 100644
--- a/text_recognizer/data/build_transitions.py
+++ b/text_recognizer/data/utils/build_transitions.py
diff --git a/text_recognizer/data/download_utils.py b/text_recognizer/data/utils/download_utils.py
index a5a5360..a5a5360 100644
--- a/text_recognizer/data/download_utils.py
+++ b/text_recognizer/data/utils/download_utils.py
diff --git a/text_recognizer/data/iam_preprocessor.py b/text_recognizer/data/utils/iam_preprocessor.py
index 700944e..60ecff1 100644
--- a/text_recognizer/data/iam_preprocessor.py
+++ b/text_recognizer/data/utils/iam_preprocessor.py
@@ -45,8 +45,6 @@ def load_metadata(
class Preprocessor:
"""A preprocessor for the IAM dataset."""
- # TODO: attrs
-
def __init__(
self,
data_dir: Union[str, Path],
diff --git a/text_recognizer/data/image_utils.py b/text_recognizer/data/utils/image_utils.py
index c2b8915..c2b8915 100644
--- a/text_recognizer/data/image_utils.py
+++ b/text_recognizer/data/utils/image_utils.py
diff --git a/text_recognizer/data/make_wordpieces.py b/text_recognizer/data/utils/make_wordpieces.py
index 8e53815..8e53815 100644
--- a/text_recognizer/data/make_wordpieces.py
+++ b/text_recognizer/data/utils/make_wordpieces.py
diff --git a/text_recognizer/data/sentence_generator.py b/text_recognizer/data/utils/sentence_generator.py
index 8567e6d..8567e6d 100644
--- a/text_recognizer/data/sentence_generator.py
+++ b/text_recognizer/data/utils/sentence_generator.py