diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-04-04 23:08:16 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-04-04 23:08:16 +0200 |
commit | 2d4714fcfeb8914f240a0d36d938b434e82f191b (patch) | |
tree | 32e7b3446332cee4685ec90870210c51f9f1279f /text_recognizer/data | |
parent | 5dc8a7097ab6b4f39f0a3add408e3fd0f131f85b (diff) |
Add new transformer network
Diffstat (limited to 'text_recognizer/data')
-rw-r--r-- | text_recognizer/data/emnist.py | 4 | ||||
-rw-r--r-- | text_recognizer/data/emnist_essentials.json | 2 |
2 files changed, 3 insertions, 3 deletions
diff --git a/text_recognizer/data/emnist.py b/text_recognizer/data/emnist.py index eda490a..12adaab 100644 --- a/text_recognizer/data/emnist.py +++ b/text_recognizer/data/emnist.py @@ -96,7 +96,7 @@ class EMNIST(BaseDataModule): def emnist_mapping( - extra_symbols: Optional[List[str]], + extra_symbols: Optional[Sequence[str]], ) -> Tuple[List, Dict[str, int], List[int]]: """Return the EMNIST mapping.""" if not ESSENTIALS_FILENAME.exists(): @@ -209,7 +209,7 @@ def _augment_emnist_characters(characters: Sequence[str]) -> Sequence[str]: # - End token at index 2 # - Padding token at index 3 # Note: Do not forget to update NUM_SPECIAL_TOKENS if changing this! - return ["<b>", "<s>", "</s>", "<p>", *characters, *iam_characters] + return ["<b>", "<s>", "<e>", "<p>", *characters, *iam_characters] def download_emnist() -> None: diff --git a/text_recognizer/data/emnist_essentials.json b/text_recognizer/data/emnist_essentials.json index 3f46a73..956c28d 100644 --- a/text_recognizer/data/emnist_essentials.json +++ b/text_recognizer/data/emnist_essentials.json @@ -1 +1 @@ -{"characters": ["<b>", "<s>", "</s>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]} +{"characters": ["<b>", "<s>", "<e>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]} |