summaryrefslogtreecommitdiff
path: root/text_recognizer/data
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-04 23:08:16 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-04 23:08:16 +0200
commit2d4714fcfeb8914f240a0d36d938b434e82f191b (patch)
tree32e7b3446332cee4685ec90870210c51f9f1279f /text_recognizer/data
parent5dc8a7097ab6b4f39f0a3add408e3fd0f131f85b (diff)
Add new transformer network
Diffstat (limited to 'text_recognizer/data')
-rw-r--r--text_recognizer/data/emnist.py4
-rw-r--r--text_recognizer/data/emnist_essentials.json2
2 files changed, 3 insertions, 3 deletions
diff --git a/text_recognizer/data/emnist.py b/text_recognizer/data/emnist.py
index eda490a..12adaab 100644
--- a/text_recognizer/data/emnist.py
+++ b/text_recognizer/data/emnist.py
@@ -96,7 +96,7 @@ class EMNIST(BaseDataModule):
def emnist_mapping(
- extra_symbols: Optional[List[str]],
+ extra_symbols: Optional[Sequence[str]],
) -> Tuple[List, Dict[str, int], List[int]]:
"""Return the EMNIST mapping."""
if not ESSENTIALS_FILENAME.exists():
@@ -209,7 +209,7 @@ def _augment_emnist_characters(characters: Sequence[str]) -> Sequence[str]:
# - End token at index 2
# - Padding token at index 3
# Note: Do not forget to update NUM_SPECIAL_TOKENS if changing this!
- return ["<b>", "<s>", "</s>", "<p>", *characters, *iam_characters]
+ return ["<b>", "<s>", "<e>", "<p>", *characters, *iam_characters]
def download_emnist() -> None:
diff --git a/text_recognizer/data/emnist_essentials.json b/text_recognizer/data/emnist_essentials.json
index 3f46a73..956c28d 100644
--- a/text_recognizer/data/emnist_essentials.json
+++ b/text_recognizer/data/emnist_essentials.json
@@ -1 +1 @@
-{"characters": ["<b>", "<s>", "</s>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]}
+{"characters": ["<b>", "<s>", "<e>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]}