summaryrefslogtreecommitdiff
path: root/text_recognizer/data/iam_paragraphs.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-27 23:11:06 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-27 23:11:06 +0200
commit9c7dbb9ca70858b870f74ecf595d3169f0cbc711 (patch)
treec342e2c004bb75571a380ef2805049a8fcec3fcc /text_recognizer/data/iam_paragraphs.py
parent9b8e14d89f0ef2508ed11f994f73af624155fe1d (diff)
Rename mapping to tokenizer
Diffstat (limited to 'text_recognizer/data/iam_paragraphs.py')
-rw-r--r--text_recognizer/data/iam_paragraphs.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py
index fe1f15c..a078c7d 100644
--- a/text_recognizer/data/iam_paragraphs.py
+++ b/text_recognizer/data/iam_paragraphs.py
@@ -17,7 +17,7 @@ from text_recognizer.data.base_dataset import (
)
from text_recognizer.data.iam import IAM
from text_recognizer.data.transforms.pad import Pad
-from text_recognizer.data.mappings import EmnistMapping
+from text_recognizer.data.tokenizer import Tokenizer
from text_recognizer.data.stems.paragraph import ParagraphStem
import text_recognizer.metadata.iam_paragraphs as metadata
@@ -27,7 +27,7 @@ class IAMParagraphs(BaseDataModule):
def __init__(
self,
- mapping: EmnistMapping,
+ tokenizer: Tokenizer,
transform: Optional[Callable] = None,
test_transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
@@ -37,7 +37,7 @@ class IAMParagraphs(BaseDataModule):
pin_memory: bool = True,
) -> None:
super().__init__(
- mapping,
+ tokenizer,
transform,
test_transform,
target_transform,
@@ -56,7 +56,7 @@ class IAMParagraphs(BaseDataModule):
log.info("Cropping IAM paragraph regions and saving them along with labels...")
- iam = IAM(mapping=EmnistMapping(extra_symbols={metadata.NEW_LINE_TOKEN}))
+ iam = IAM(tokenizer=self.tokenizer)
iam.prepare_data()
properties = {}
@@ -88,7 +88,7 @@ class IAMParagraphs(BaseDataModule):
data = [resize_image(crop, metadata.IMAGE_SCALE_FACTOR) for crop in crops]
targets = convert_strings_to_labels(
strings=labels,
- mapping=self.mapping.inverse_mapping,
+ mapping=self.tokenizer.inverse_mapping,
length=self.output_dims[0],
)
return BaseDataset(
@@ -122,7 +122,7 @@ class IAMParagraphs(BaseDataModule):
"""Return information about the dataset."""
basic = (
"IAM Paragraphs Dataset\n"
- f"Num classes: {len(self.mapping)}\n"
+ f"Num classes: {len(self.tokenizer)}\n"
f"Input dims: {self.dims}\n"
f"Output dims: {self.output_dims}\n"
)