summaryrefslogtreecommitdiff
path: root/text_recognizer/data
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-10-03 00:32:55 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-10-03 00:32:55 +0200
commite6a5ef0330fb35f5c1c79872d57c3293127d4aa7 (patch)
tree1cd64ecbbb63b1934bf6017474295b82e2d3e22a /text_recognizer/data
parent790c6321941226cd650ff5fa1729dd8c19966f4f (diff)
Add length as variable to WordPiece transform
Diffstat (limited to 'text_recognizer/data')
-rw-r--r--text_recognizer/data/iam_paragraphs.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py
index 74b6165..254c7f5 100644
--- a/text_recognizer/data/iam_paragraphs.py
+++ b/text_recognizer/data/iam_paragraphs.py
@@ -31,6 +31,7 @@ IMAGE_SCALE_FACTOR = 2
IMAGE_HEIGHT = 1152 // IMAGE_SCALE_FACTOR
IMAGE_WIDTH = 1280 // IMAGE_SCALE_FACTOR
MAX_LABEL_LENGTH = 682
+MAX_WORD_PIECE_LENGTH = 451
@attr.s(auto_attribs=True, repr=False)
@@ -299,9 +300,11 @@ def get_transform(
return T.Compose(transforms_list)
-def get_target_transform(word_pieces: bool) -> Optional[T.Compose]:
+def get_target_transform(
+ word_pieces: bool, max_len: int = MAX_WORD_PIECE_LENGTH
+) -> Optional[T.Compose]:
"""Transform emnist characters to word pieces."""
- return T.Compose([WordPiece()]) if word_pieces else None
+ return T.Compose([WordPiece(max_len=max_len)]) if word_pieces else None
def _labels_filename(split: str) -> Path: