summaryrefslogtreecommitdiff
path: root/text_recognizer/data/base_dataset.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-08 23:38:03 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-08 23:38:03 +0200
commite388cd95c77d37a51324cff9d84a809421bf97d3 (patch)
treed585545f85d03ea8a6907daba254821fddeb1589 /text_recognizer/data/base_dataset.py
parentf4629a0d4149d5870c9fd8ce83ff5d391bd7ddd3 (diff)
Bug fixes word pieces
Diffstat (limited to 'text_recognizer/data/base_dataset.py')
-rw-r--r--text_recognizer/data/base_dataset.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/text_recognizer/data/base_dataset.py b/text_recognizer/data/base_dataset.py
index d00daaf..8d644d4 100644
--- a/text_recognizer/data/base_dataset.py
+++ b/text_recognizer/data/base_dataset.py
@@ -67,7 +67,7 @@ def convert_strings_to_labels(
labels = torch.ones((len(strings), length), dtype=torch.long) * mapping["<p>"]
for i, string in enumerate(strings):
tokens = list(string)
- tokens = ["<s>", *tokens, "</s>"]
+ tokens = ["<s>", *tokens, "<e>"]
for j, token in enumerate(tokens):
labels[i, j] = mapping[token]
return labels