From e6a5ef0330fb35f5c1c79872d57c3293127d4aa7 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sun, 3 Oct 2021 00:32:55 +0200 Subject: Add length as variable to WordPiece transform --- text_recognizer/data/iam_paragraphs.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'text_recognizer/data') diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py index 74b6165..254c7f5 100644 --- a/text_recognizer/data/iam_paragraphs.py +++ b/text_recognizer/data/iam_paragraphs.py @@ -31,6 +31,7 @@ IMAGE_SCALE_FACTOR = 2 IMAGE_HEIGHT = 1152 // IMAGE_SCALE_FACTOR IMAGE_WIDTH = 1280 // IMAGE_SCALE_FACTOR MAX_LABEL_LENGTH = 682 +MAX_WORD_PIECE_LENGTH = 451 @attr.s(auto_attribs=True, repr=False) @@ -299,9 +300,11 @@ def get_transform( return T.Compose(transforms_list) -def get_target_transform(word_pieces: bool) -> Optional[T.Compose]: +def get_target_transform( + word_pieces: bool, max_len: int = MAX_WORD_PIECE_LENGTH +) -> Optional[T.Compose]: """Transform emnist characters to word pieces.""" - return T.Compose([WordPiece()]) if word_pieces else None + return T.Compose([WordPiece(max_len=max_len)]) if word_pieces else None def _labels_filename(split: str) -> Path: -- cgit v1.2.3-70-g09d2