Add new transformer network

author: Gustaf Rydholm <gustaf.rydholm@gmail.com> 2021-04-04 23:08:16 +0200
committer: Gustaf Rydholm <gustaf.rydholm@gmail.com> 2021-04-04 23:08:16 +0200
commit: 2d4714fcfeb8914f240a0d36d938b434e82f191b (patch)
tree: 32e7b3446332cee4685ec90870210c51f9f1279f /text_recognizer/data
parent: 5dc8a7097ab6b4f39f0a3add408e3fd0f131f85b (diff)
2 files changed, 3 insertions, 3 deletions
diff --git a/text_recognizer/data/emnist.py b/text_recognizer/data/emnist.py
index eda490a..12adaab 100644
--- a/text_recognizer/data/emnist.py
+++ b/text_recognizer/data/emnist.py
@@ -96,7 +96,7 @@ class EMNIST(BaseDataModule):
 
 
 def emnist_mapping(
-    extra_symbols: Optional[List[str]],
+    extra_symbols: Optional[Sequence[str]],
 ) -> Tuple[List, Dict[str, int], List[int]]:
     """Return the EMNIST mapping."""
     if not ESSENTIALS_FILENAME.exists():
@@ -209,7 +209,7 @@ def _augment_emnist_characters(characters: Sequence[str]) -> Sequence[str]:
     # - End token at index 2
     # - Padding token at index 3
     # Note: Do not forget to update NUM_SPECIAL_TOKENS if changing this!
-    return ["<b>", "<s>", "</s>", "<p>", *characters, *iam_characters]
+    return ["<b>", "<s>", "<e>", "<p>", *characters, *iam_characters]
 
 
 def download_emnist() -> None:
diff --git a/text_recognizer/data/emnist_essentials.json b/text_recognizer/data/emnist_essentials.json
index 3f46a73..956c28d 100644
--- a/text_recognizer/data/emnist_essentials.json
+++ b/text_recognizer/data/emnist_essentials.json
@@ -1 +1 @@
-{"characters": ["<b>", "<s>", "</s>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]}
+{"characters": ["<b>", "<s>", "<e>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]}
author	Gustaf Rydholm <gustaf.rydholm@gmail.com>	2021-04-04 23:08:16 +0200
committer	Gustaf Rydholm <gustaf.rydholm@gmail.com>	2021-04-04 23:08:16 +0200
commit	2d4714fcfeb8914f240a0d36d938b434e82f191b (patch)
tree	32e7b3446332cee4685ec90870210c51f9f1279f /text_recognizer/data
parent	5dc8a7097ab6b4f39f0a3add408e3fd0f131f85b (diff)