summaryrefslogtreecommitdiff
path: root/text_recognizer/data
diff options
context:
space:
mode:
Diffstat (limited to 'text_recognizer/data')
-rw-r--r--text_recognizer/data/iam_extended_paragraphs.py6
-rw-r--r--text_recognizer/data/iam_paragraphs.py9
2 files changed, 6 insertions, 9 deletions
diff --git a/text_recognizer/data/iam_extended_paragraphs.py b/text_recognizer/data/iam_extended_paragraphs.py
index 23e424d..0e97801 100644
--- a/text_recognizer/data/iam_extended_paragraphs.py
+++ b/text_recognizer/data/iam_extended_paragraphs.py
@@ -16,6 +16,7 @@ class IAMExtendedParagraphs(BaseDataModule):
augment: bool = attr.ib(default=True)
train_fraction: float = attr.ib(default=0.8)
word_pieces: bool = attr.ib(default=False)
+ num_classes: int = attr.ib(init=False)
def __attrs_post_init__(self) -> None:
self.iam_paragraphs = IAMParagraphs(
@@ -35,8 +36,7 @@ class IAMExtendedParagraphs(BaseDataModule):
self.dims = self.iam_paragraphs.dims
self.output_dims = self.iam_paragraphs.output_dims
- self.mapping = self.iam_paragraphs.mapping
- self.inverse_mapping = self.iam_paragraphs.inverse_mapping
+ self.num_classes = self.iam_paragraphs.num_classes
def prepare_data(self) -> None:
"""Prepares the paragraphs data."""
@@ -58,7 +58,7 @@ class IAMExtendedParagraphs(BaseDataModule):
"""Returns info about the dataset."""
basic = (
"IAM Original and Synthetic Paragraphs Dataset\n" # pylint: disable=no-member
- f"Num classes: {len(self.mapping)}\n"
+ f"Num classes: {len(self.num_classes)}\n"
f"Dims: {self.dims}\n"
f"Output dims: {self.output_dims}\n"
)
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py
index 82058e0..7ba1077 100644
--- a/text_recognizer/data/iam_paragraphs.py
+++ b/text_recognizer/data/iam_paragraphs.py
@@ -38,20 +38,17 @@ MAX_LABEL_LENGTH = 682
class IAMParagraphs(BaseDataModule):
"""IAM handwriting database paragraphs."""
+ num_classes: int = attr.ib()
augment: bool = attr.ib(default=True)
train_fraction: float = attr.ib(default=0.8)
- word_pieces: bool = attr.ib(default=False)
dims: Tuple[int, int, int] = attr.ib(
init=False, default=(1, IMAGE_HEIGHT, IMAGE_WIDTH)
)
output_dims: Tuple[int, int] = attr.ib(init=False, default=(MAX_LABEL_LENGTH, 1))
+ inverse_mapping: Dict[str, int] = attr.ib(init=False)
def __attrs_post_init__(self) -> None:
- self.mapping, self.inverse_mapping, _ = emnist_mapping(
- extra_symbols=[NEW_LINE_TOKEN]
- )
- if self.word_pieces:
- self.mapping = WordPieceMapping()
+ _, self.inverse_mapping, _ = emnist_mapping(extra_symbols=[NEW_LINE_TOKEN])
def prepare_data(self) -> None:
"""Create data for training/testing."""