From 905eeeb4c3c0ba54b5414eb8f435e2e9870b7307 Mon Sep 17 00:00:00 2001 From: aktersnurra Date: Wed, 24 Feb 2021 22:00:29 +0100 Subject: updates --- src/tasks/make_wordpieces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/tasks/make_wordpieces.py') diff --git a/src/tasks/make_wordpieces.py b/src/tasks/make_wordpieces.py index f605920..2ac0e2c 100644 --- a/src/tasks/make_wordpieces.py +++ b/src/tasks/make_wordpieces.py @@ -30,7 +30,7 @@ def iamdb_pieces( user_symbols=["/"], # added so token is in the output set ) - vocab = sorted(set(w for t in text for w in t.split("_") if w)) + vocab = sorted(set(w for t in text for w in t.split("▁") if w)) if "move" not in vocab: raise RuntimeError("`MOVE` not in vocab") -- cgit v1.2.3-70-g09d2