From bd4bd443f339e95007bfdabf3e060db720f4d4b9 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 3 Aug 2021 18:18:48 +0200 Subject: Training working, multiple bug fixes --- text_recognizer/data/transforms.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'text_recognizer/data/transforms.py') diff --git a/text_recognizer/data/transforms.py b/text_recognizer/data/transforms.py index 3b1b929..047496f 100644 --- a/text_recognizer/data/transforms.py +++ b/text_recognizer/data/transforms.py @@ -1,11 +1,11 @@ """Transforms for PyTorch datasets.""" from pathlib import Path -from typing import Optional, Union, Sequence +from typing import Optional, Union, Set import torch from torch import Tensor -from text_recognizer.data.mappings import WordPieceMapping +from text_recognizer.data.word_piece_mapping import WordPieceMapping class WordPiece: @@ -19,8 +19,8 @@ class WordPiece: data_dir: Optional[Union[str, Path]] = None, use_words: bool = False, prepend_wordsep: bool = False, - special_tokens: Sequence[str] = ("", "", "

"), - extra_symbols: Optional[Sequence[str]] = ("\n",), + special_tokens: Set[str] = {"", "", "

"}, + extra_symbols: Optional[Set[str]] = {"\n",}, max_len: int = 451, ) -> None: self.mapping = WordPieceMapping( -- cgit v1.2.3-70-g09d2