summaryrefslogtreecommitdiff
path: root/text_recognizer/data
diff options
context:
space:
mode:
Diffstat (limited to 'text_recognizer/data')
-rw-r--r--text_recognizer/data/iam_paragraphs.py5
-rw-r--r--text_recognizer/data/iam_synthetic_paragraphs.py8
-rw-r--r--text_recognizer/data/transforms/word_piece.py4
3 files changed, 14 insertions, 3 deletions
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py
index 033b93e..a8d5e0d 100644
--- a/text_recognizer/data/iam_paragraphs.py
+++ b/text_recognizer/data/iam_paragraphs.py
@@ -86,7 +86,10 @@ class IAMParagraphs(BaseDataModule):
length=self.output_dims[0],
)
return BaseDataset(
- data, targets, transform=transform, target_transform=target_transform,
+ data,
+ targets,
+ transform=transform,
+ target_transform=target_transform,
)
log.info(f"Loading IAM paragraph regions and lines for {stage}...")
diff --git a/text_recognizer/data/iam_synthetic_paragraphs.py b/text_recognizer/data/iam_synthetic_paragraphs.py
index d906399..351e052 100644
--- a/text_recognizer/data/iam_synthetic_paragraphs.py
+++ b/text_recognizer/data/iam_synthetic_paragraphs.py
@@ -46,7 +46,13 @@ class IAMSyntheticParagraphs(IAMParagraphs):
log.info("Preparing IAM lines for synthetic paragraphs dataset.")
log.info("Cropping IAM line regions and loading labels.")
- iam = IAM(mapping=EmnistMapping(extra_symbols={NEW_LINE_TOKEN,}))
+ iam = IAM(
+ mapping=EmnistMapping(
+ extra_symbols={
+ NEW_LINE_TOKEN,
+ }
+ )
+ )
iam.prepare_data()
crops_train, labels_train = line_crops_and_labels(iam, "train")
diff --git a/text_recognizer/data/transforms/word_piece.py b/text_recognizer/data/transforms/word_piece.py
index 69f0ce1..a15615d 100644
--- a/text_recognizer/data/transforms/word_piece.py
+++ b/text_recognizer/data/transforms/word_piece.py
@@ -19,7 +19,9 @@ class WordPiece:
use_words: bool = False,
prepend_wordsep: bool = False,
special_tokens: Set[str] = {"<s>", "<e>", "<p>"},
- extra_symbols: Optional[Set[str]] = {"\n",},
+ extra_symbols: Optional[Set[str]] = {
+ "\n",
+ },
max_len: int = 451,
) -> None:
self.mapping = WordPieceMapping(