summaryrefslogtreecommitdiff
path: root/training/conf/mapping/word_piece.yaml
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-02 21:13:48 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-02 21:13:48 +0200
commit75801019981492eedf9280cb352eea3d8e99b65f (patch)
tree6521cc4134459e42591b2375f70acd348741474e /training/conf/mapping/word_piece.yaml
parente5eca28438cd17d436359f2c6eee0bb9e55d2a8b (diff)
Fix log import, fix mapping in datamodules, fix nn modules can be hashed
Diffstat (limited to 'training/conf/mapping/word_piece.yaml')
-rw-r--r--training/conf/mapping/word_piece.yaml9
1 files changed, 9 insertions, 0 deletions
diff --git a/training/conf/mapping/word_piece.yaml b/training/conf/mapping/word_piece.yaml
new file mode 100644
index 0000000..3792523
--- /dev/null
+++ b/training/conf/mapping/word_piece.yaml
@@ -0,0 +1,9 @@
+_target_: text_recognizer.data.mappings.WordPieceMapping
+num_features: 1000
+tokens: iamdb_1kwp_tokens_1000.txt
+lexicon: iamdb_1kwp_lex_1000.txt
+data_dir: null
+use_words: false
+prepend_wordsep: false
+special_tokens: [ <s>, <e>, <p> ]
+extra_symbols: [ \n ]