diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-27 23:11:06 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-27 23:11:06 +0200 |
commit | 9c7dbb9ca70858b870f74ecf595d3169f0cbc711 (patch) | |
tree | c342e2c004bb75571a380ef2805049a8fcec3fcc /text_recognizer/data/iam_paragraphs.py | |
parent | 9b8e14d89f0ef2508ed11f994f73af624155fe1d (diff) |
Rename mapping to tokenizer
Diffstat (limited to 'text_recognizer/data/iam_paragraphs.py')
-rw-r--r-- | text_recognizer/data/iam_paragraphs.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py index fe1f15c..a078c7d 100644 --- a/text_recognizer/data/iam_paragraphs.py +++ b/text_recognizer/data/iam_paragraphs.py @@ -17,7 +17,7 @@ from text_recognizer.data.base_dataset import ( ) from text_recognizer.data.iam import IAM from text_recognizer.data.transforms.pad import Pad -from text_recognizer.data.mappings import EmnistMapping +from text_recognizer.data.tokenizer import Tokenizer from text_recognizer.data.stems.paragraph import ParagraphStem import text_recognizer.metadata.iam_paragraphs as metadata @@ -27,7 +27,7 @@ class IAMParagraphs(BaseDataModule): def __init__( self, - mapping: EmnistMapping, + tokenizer: Tokenizer, transform: Optional[Callable] = None, test_transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, @@ -37,7 +37,7 @@ class IAMParagraphs(BaseDataModule): pin_memory: bool = True, ) -> None: super().__init__( - mapping, + tokenizer, transform, test_transform, target_transform, @@ -56,7 +56,7 @@ class IAMParagraphs(BaseDataModule): log.info("Cropping IAM paragraph regions and saving them along with labels...") - iam = IAM(mapping=EmnistMapping(extra_symbols={metadata.NEW_LINE_TOKEN})) + iam = IAM(tokenizer=self.tokenizer) iam.prepare_data() properties = {} @@ -88,7 +88,7 @@ class IAMParagraphs(BaseDataModule): data = [resize_image(crop, metadata.IMAGE_SCALE_FACTOR) for crop in crops] targets = convert_strings_to_labels( strings=labels, - mapping=self.mapping.inverse_mapping, + mapping=self.tokenizer.inverse_mapping, length=self.output_dims[0], ) return BaseDataset( @@ -122,7 +122,7 @@ class IAMParagraphs(BaseDataModule): """Return information about the dataset.""" basic = ( "IAM Paragraphs Dataset\n" - f"Num classes: {len(self.mapping)}\n" + f"Num classes: {len(self.tokenizer)}\n" f"Input dims: {self.dims}\n" f"Output dims: {self.output_dims}\n" ) |