diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-10-03 00:32:55 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-10-03 00:32:55 +0200 |
commit | e6a5ef0330fb35f5c1c79872d57c3293127d4aa7 (patch) | |
tree | 1cd64ecbbb63b1934bf6017474295b82e2d3e22a /text_recognizer/data | |
parent | 790c6321941226cd650ff5fa1729dd8c19966f4f (diff) |
Add length as variable to WordPiece transform
Diffstat (limited to 'text_recognizer/data')
-rw-r--r-- | text_recognizer/data/iam_paragraphs.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py index 74b6165..254c7f5 100644 --- a/text_recognizer/data/iam_paragraphs.py +++ b/text_recognizer/data/iam_paragraphs.py @@ -31,6 +31,7 @@ IMAGE_SCALE_FACTOR = 2 IMAGE_HEIGHT = 1152 // IMAGE_SCALE_FACTOR IMAGE_WIDTH = 1280 // IMAGE_SCALE_FACTOR MAX_LABEL_LENGTH = 682 +MAX_WORD_PIECE_LENGTH = 451 @attr.s(auto_attribs=True, repr=False) @@ -299,9 +300,11 @@ def get_transform( return T.Compose(transforms_list) -def get_target_transform(word_pieces: bool) -> Optional[T.Compose]: +def get_target_transform( + word_pieces: bool, max_len: int = MAX_WORD_PIECE_LENGTH +) -> Optional[T.Compose]: """Transform emnist characters to word pieces.""" - return T.Compose([WordPiece()]) if word_pieces else None + return T.Compose([WordPiece(max_len=max_len)]) if word_pieces else None def _labels_filename(split: str) -> Path: |