diff options
Diffstat (limited to 'text_recognizer/data')
-rw-r--r-- | text_recognizer/data/iam_paragraphs.py | 5 | ||||
-rw-r--r-- | text_recognizer/data/iam_synthetic_paragraphs.py | 8 | ||||
-rw-r--r-- | text_recognizer/data/transforms/word_piece.py | 4 |
3 files changed, 14 insertions, 3 deletions
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py index 033b93e..a8d5e0d 100644 --- a/text_recognizer/data/iam_paragraphs.py +++ b/text_recognizer/data/iam_paragraphs.py @@ -86,7 +86,10 @@ class IAMParagraphs(BaseDataModule): length=self.output_dims[0], ) return BaseDataset( - data, targets, transform=transform, target_transform=target_transform, + data, + targets, + transform=transform, + target_transform=target_transform, ) log.info(f"Loading IAM paragraph regions and lines for {stage}...") diff --git a/text_recognizer/data/iam_synthetic_paragraphs.py b/text_recognizer/data/iam_synthetic_paragraphs.py index d906399..351e052 100644 --- a/text_recognizer/data/iam_synthetic_paragraphs.py +++ b/text_recognizer/data/iam_synthetic_paragraphs.py @@ -46,7 +46,13 @@ class IAMSyntheticParagraphs(IAMParagraphs): log.info("Preparing IAM lines for synthetic paragraphs dataset.") log.info("Cropping IAM line regions and loading labels.") - iam = IAM(mapping=EmnistMapping(extra_symbols={NEW_LINE_TOKEN,})) + iam = IAM( + mapping=EmnistMapping( + extra_symbols={ + NEW_LINE_TOKEN, + } + ) + ) iam.prepare_data() crops_train, labels_train = line_crops_and_labels(iam, "train") diff --git a/text_recognizer/data/transforms/word_piece.py b/text_recognizer/data/transforms/word_piece.py index 69f0ce1..a15615d 100644 --- a/text_recognizer/data/transforms/word_piece.py +++ b/text_recognizer/data/transforms/word_piece.py @@ -19,7 +19,9 @@ class WordPiece: use_words: bool = False, prepend_wordsep: bool = False, special_tokens: Set[str] = {"<s>", "<e>", "<p>"}, - extra_symbols: Optional[Set[str]] = {"\n",}, + extra_symbols: Optional[Set[str]] = { + "\n", + }, max_len: int = 451, ) -> None: self.mapping = WordPieceMapping( |