From 7e8e54e84c63171e748bbf09516fd517e6821ace Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 20 Mar 2021 18:09:06 +0100 Subject: Inital commit for refactoring to lightning --- notebooks/07-look-at-lexicon.ipynb | 1119 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1119 insertions(+) create mode 100644 notebooks/07-look-at-lexicon.ipynb (limited to 'notebooks/07-look-at-lexicon.ipynb') diff --git a/notebooks/07-look-at-lexicon.ipynb b/notebooks/07-look-at-lexicon.ipynb new file mode 100644 index 0000000..b7a5a0e --- /dev/null +++ b/notebooks/07-look-at-lexicon.ipynb @@ -0,0 +1,1119 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "from pathlib import Path\n", + "import numpy as np\n", + "from PIL import Image\n", + "import torch.nn.functional as F\n", + "import torch\n", + "from torch import nn\n", + "from torchsummary import summary\n", + "from importlib.util import find_spec\n", + "if find_spec(\"text_recognizer\") is None:\n", + " import sys\n", + " sys.path.append('..')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "path = Path(\"../\").resolve().parent / \"data\" / \"processed\" / \"iam_lines\" / \"iamdb_1kwp_lex_1000.txt\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/data/processed/iam_lines/iamdb_1kwp_lex_1000.txt')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "with open(path, \"r\") as f:\n", + " lex = (line.strip().split() for line in f)\n", + " lex = {line[0]: line[1:] for line in lex}\n", + " #print(len(lex))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'!': ['▁', '!'],\n", + " '\"': ['▁', '\"'],\n", + " '&': ['▁', '&'],\n", + " \"'\": ['▁', \"'\"],\n", + " \"'30s\": ['▁', \"'\", '3', '0', 's'],\n", + " \"'61\": ['▁', \"'\", '6', '1'],\n", + " \"'d\": ['▁', \"'\", 'd'],\n", + " \"'ll\": ['▁', \"'\", 'll'],\n", + " \"'m\": ['▁', \"'\", 'm'],\n", + " \"'re\": ['▁', \"'\", 're'],\n", + " \"'s\": ['▁', \"'\", 's'],\n", + " \"'ve\": ['▁', \"'\", 've'],\n", + " '(': ['▁', '('],\n", + " ')': ['▁', ')'],\n", + " '*': ['▁', '*'],\n", + " '+2.8': ['▁', '+', '2', '.', '8'],\n", + " '+3.6': ['▁', '+', '3', '.', '6'],\n", + " ',': ['▁', ','],\n", + " '-': ['▁', '-'],\n", + " '-2.6': ['▁', '-', '2', '.', '6'],\n", + " '-5.4': ['▁', '-', '5', '.', '4'],\n", + " '.': ['▁', '.'],\n", + " '...': ['▁', '.', '.', '.'],\n", + " '0m': ['▁', '0', 'm'],\n", + " '1': ['▁', '1'],\n", + " '1,157': ['▁', '1', ',', '1', '5', '7'],\n", + " '1,400': ['▁', '1', ',', '4', '0', '0'],\n", + " '1,500': ['▁', '1', ',', '5', '0', '0'],\n", + " '1-2': ['▁', '1', '-', '2'],\n", + " '1.8': ['▁', '1', '.', '8'],\n", + " '1/2': ['▁', '1', '/', '2'],\n", + " '1/2-in.-long': ['▁', '1', '/', '2', '-', 'in', '.', '-', 'long'],\n", + " '1/4': ['▁', '1', '/', '4'],\n", + " '10': ['▁', '10'],\n", + " '10,000': ['▁', '10', ',', '0', '0', '0'],\n", + " '100': ['▁', '10', '0'],\n", + " '100,000,000': ['▁', '10', '0', ',', '0', '00,000'],\n", + " '104': ['▁', '10', '4'],\n", + " '11': ['▁', '1', '1'],\n", + " '12': ['▁', '1', '2'],\n", + " '12,000-word': ['▁', '1', '2', ',', '0', '0', '0', '-', 'word'],\n", + " '125': ['▁', '1', '2', '5'],\n", + " '13': ['▁', '1', '3'],\n", + " '13,000': ['▁', '1', '3', ',', '0', '0', '0'],\n", + " '14': ['▁', '1', '4'],\n", + " '15': ['▁', '1', '5'],\n", + " '15,000,000': ['▁', '1', '5', ',', '0', '00,000'],\n", + " '15-17': ['▁', '1', '5', '-', '1', '7'],\n", + " '15-nation': ['▁', '1', '5', '-', 'n', 'ation'],\n", + " '15-year-olds': ['▁', '1', '5', '-', 'year', '-', 'old', 's'],\n", + " '150,000,000': ['▁', '1', '5', '0', ',', '0', '00,000'],\n", + " '16': ['▁', '1', '6'],\n", + " '16,000': ['▁', '1', '6', ',', '0', '0', '0'],\n", + " '160': ['▁', '1', '6', '0'],\n", + " '163,000,000': ['▁', '1', '6', '3', ',', '0', '00,000'],\n", + " '167': ['▁', '1', '6', '7'],\n", + " '17': ['▁', '1', '7'],\n", + " '18': ['▁', '1', '8'],\n", + " '18.1': ['▁', '1', '8', '.', '1'],\n", + " '1830': ['▁', '1', '8', '3', '0'],\n", + " \"1830's\": ['▁', '1', '8', '3', '0', \"'\", 's'],\n", + " '1834': ['▁', '1', '8', '3', '4'],\n", + " '1897': ['▁', '1', '8', '9', '7'],\n", + " '19': ['▁', '1', '9'],\n", + " '19.5': ['▁', '1', '9', '.', '5'],\n", + " '1910': ['▁', '1', '9', '10'],\n", + " '1913': ['▁', '1', '9', '1', '3'],\n", + " '1914': ['▁', '1', '9', '1', '4'],\n", + " '1914-18': ['▁', '1', '9', '1', '4', '-', '1', '8'],\n", + " '1918': ['▁', '1', '9', '1', '8'],\n", + " '1920': ['▁', '1', '9', '2', '0'],\n", + " '1930': ['▁', '1', '9', '3', '0'],\n", + " '1931': ['▁', '1', '9', '3', '1'],\n", + " '1932': ['▁', '1', '9', '3', '2'],\n", + " '1934': ['▁', '1', '9', '3', '4'],\n", + " '1936': ['▁', '1', '9', '3', '6'],\n", + " '1939': ['▁', '1', '9', '3', '9'],\n", + " '1943': ['▁', '1', '9', '4', '3'],\n", + " '1944': ['▁', '1', '9', '4', '4'],\n", + " '1950': ['▁', '1', '9', '5', '0'],\n", + " '1951': ['▁', '1', '9', '5', '1'],\n", + " '1952': ['▁', '1', '9', '5', '2'],\n", + " '1953': ['▁', '1', '9', '5', '3'],\n", + " '1954': ['▁', '1', '9', '5', '4'],\n", + " '1956': ['▁', '1', '9', '5', '6'],\n", + " '1957': ['▁', '1', '9', '5', '7'],\n", + " '1958': ['▁', '1', '9', '5', '8'],\n", + " '1959': ['▁', '1', '9', '5', '9'],\n", + " '1960': ['▁', '1960'],\n", + " '1960s': ['▁', '1960', 's'],\n", + " '1961': ['▁', '1', '9', '6', '1'],\n", + " '1963': ['▁', '1', '9', '6', '3'],\n", + " '19th': ['▁', '1', '9', 'th'],\n", + " '1superceded': ['▁', '1', 'superceded'],\n", + " \"1tho'\": ['▁', '1', 'tho', \"'\"],\n", + " '2': ['▁', '2'],\n", + " '2,000': ['▁', '2', ',', '0', '0', '0'],\n", + " '2,415,000,000': ['▁', '2', ',', '4', '1', '5', ',', '0', '00,000'],\n", + " '20': ['▁', '2', '0'],\n", + " '20-month-old': ['▁', '2', '0', '-', 'month', '-', 'old'],\n", + " '200': ['▁', '2', '0', '0'],\n", + " '20th-century': ['▁', '2', '0', 'th', '-', 'cent', 'ur', 'y'],\n", + " '21': ['▁', '2', '1'],\n", + " '210million': ['▁', '2', '10', 'million'],\n", + " '22': ['▁', '2', '2'],\n", + " '23.1': ['▁', '2', '3', '.', '1'],\n", + " '24': ['▁', '2', '4'],\n", + " '24-strong': ['▁', '2', '4', '-', 'strong'],\n", + " '25': ['▁', '2', '5'],\n", + " '27': ['▁', '2', '7'],\n", + " '28.5': ['▁', '2', '8', '.', '5'],\n", + " '280,000': ['▁', '2', '8', '0', ',', '0', '0', '0'],\n", + " '287': ['▁', '2', '8', '7'],\n", + " '288': ['▁', '2', '8', '8'],\n", + " '2bhoys': ['▁', '2', 'b', 'ho', 'y', 's'],\n", + " '2ole': ['▁', '2', 'o', 'le'],\n", + " '2pianna': ['▁', '2', 'p', 'i', 'an', 'n', 'a'],\n", + " '2skint': ['▁', '2', 's', 'k', 'in', 't'],\n", + " '3': ['▁', '3'],\n", + " '3,000': ['▁', '3', ',', '0', '0', '0'],\n", + " '3.6': ['▁', '3', '.', '6'],\n", + " '3/0': ['▁', '3', '/', '0'],\n", + " '3/4': ['▁', '3', '/', '4'],\n", + " '30': ['▁', '3', '0'],\n", + " '30-day': ['▁', '3', '0', '-', 'day'],\n", + " '30-minute': ['▁', '3', '0', '-', 'minute'],\n", + " '300,000': ['▁', '3', '00,000'],\n", + " '32': ['▁', '3', '2'],\n", + " '33': ['▁', '3', '3'],\n", + " '34': ['▁', '3', '4'],\n", + " '35': ['▁', '3', '5'],\n", + " '357million': ['▁', '3', '5', '7', 'million'],\n", + " '36': ['▁', '3', '6'],\n", + " '37,000,000': ['▁', '3', '7', ',', '0', '00,000'],\n", + " '37.2': ['▁', '3', '7', '.', '2'],\n", + " '38': ['▁', '3', '8'],\n", + " '4': ['▁', '4'],\n", + " '4.8': ['▁', '4', '.', '8'],\n", + " '40': ['▁', '4', '0'],\n", + " '400': ['▁', '4', '0', '0'],\n", + " '400,000': ['▁', '4', '00,000'],\n", + " '420000': ['▁', '4', '2', '0', '0', '0', '0'],\n", + " '43': ['▁', '4', '3'],\n", + " '450': ['▁', '4', '5', '0'],\n", + " '5': ['▁', '5'],\n", + " '5,000': ['▁', '5', ',', '0', '0', '0'],\n", + " '5.30': ['▁', '5', '.', '3', '0'],\n", + " '5/8': ['▁', '5', '/', '8'],\n", + " '50': ['▁', '5', '0'],\n", + " '50,000': ['▁', '5', '0', ',', '0', '0', '0'],\n", + " '500': ['▁', '5', '0', '0'],\n", + " '53-year-old': ['▁', '5', '3', '-', 'year', '-', 'old'],\n", + " '55': ['▁', '5', '5'],\n", + " '550,000': ['▁', '5', '5', '0', ',', '0', '0', '0'],\n", + " '58': ['▁', '5', '8'],\n", + " '6': ['▁', '6'],\n", + " '6,000': ['▁', '6', ',', '0', '0', '0'],\n", + " '60': ['▁', '6', '0'],\n", + " '600': ['▁', '6', '0', '0'],\n", + " '600,000': ['▁', '6', '00,000'],\n", + " '61-year-old': ['▁', '6', '1', '-', 'year', '-', 'old'],\n", + " '68': ['▁', '6', '8'],\n", + " '6al': ['▁', '6', 'al'],\n", + " '6tic': ['▁', '6', 'tic'],\n", + " '7.30': ['▁', '7', '.', '3', '0'],\n", + " '7.42': ['▁', '7', '.', '4', '2'],\n", + " '70': ['▁', '7', '0'],\n", + " '70,000,000': ['▁', '7', '0', ',', '0', '00,000'],\n", + " '707': ['▁', '7', '0', '7'],\n", + " '73': ['▁', '7', '3'],\n", + " '750': ['▁', '7', '5', '0'],\n", + " '8': ['▁', '8'],\n", + " '8,000,000': ['▁', '8', ',', '0', '00,000'],\n", + " '8.25': ['▁', '8', '.', '2', '5'],\n", + " '8.4': ['▁', '8', '.', '4'],\n", + " '80': ['▁', '8', '0'],\n", + " '800': ['▁', '8', '0', '0'],\n", + " '800,000': ['▁', '8', '00,000'],\n", + " '86': ['▁', '8', '6'],\n", + " '88': ['▁', '8', '8'],\n", + " '88-year-old': ['▁', '8', '8', '-', 'year', '-', 'old'],\n", + " '89': ['▁', '8', '9'],\n", + " '89-year-old': ['▁', '8', '9', '-', 'year', '-', 'old'],\n", + " '9.30': ['▁', '9', '.', '3', '0'],\n", + " '9.40': ['▁', '9', '.', '4', '0'],\n", + " '90-day': ['▁', '9', '0', '-', 'day'],\n", + " '90-minute': ['▁', '9', '0', '-', 'minute'],\n", + " '91': ['▁', '9', '1'],\n", + " '950': ['▁', '9', '5', '0'],\n", + " '97.5': ['▁', '9', '7', '.', '5'],\n", + " ':': ['▁', ':'],\n", + " ';': ['▁', ';'],\n", + " '?': ['▁', '?'],\n", + " 'a': ['▁', 'a'],\n", + " 'abandon': ['▁', 'a', 'b', 'and', 'on'],\n", + " 'abandoned': ['▁', 'a', 'b', 'and', 'on', 'ed'],\n", + " 'abandoning': ['▁', 'a', 'b', 'and', 'on', 'ing'],\n", + " 'abashed': ['▁', 'a', 'bas', 'he', 'd'],\n", + " 'ability': ['▁', 'a', 'b', 'il', 'ity'],\n", + " 'able': ['▁', 'able'],\n", + " 'able-bodied': ['▁', 'able', '-', 'bo', 'die', 'd'],\n", + " 'abolish': ['▁', 'a', 'bo', 'l', 'ish'],\n", + " 'abolished': ['▁', 'a', 'bo', 'l', 'ish', 'ed'],\n", + " 'abolition': ['▁', 'a', 'bo', 'li', 'tion'],\n", + " 'abortion': ['▁', 'a', 'b', 'or', 'tion'],\n", + " 'abou': ['▁', 'a', 'bo', 'u'],\n", + " 'about': ['▁', 'about'],\n", + " 'about-': ['▁', 'about', '-'],\n", + " 'above': ['▁', 'a', 'bo', 've'],\n", + " 'abreast': ['▁', 'a', 'br', 'east'],\n", + " 'abroad': ['▁', 'a', 'b', 'ro', 'ad'],\n", + " 'absence': ['▁', 'a', 'b', 's', 'ence'],\n", + " 'absent': ['▁', 'a', 'b', 's', 'ent'],\n", + " 'absolutely': ['▁', 'a', 'b', 'solut', 'e', 'ly'],\n", + " 'abstraction': ['▁', 'a', 'b', 's', 'tr', 'action'],\n", + " 'abundance': ['▁', 'a', 'b', 'un', 'd', 'ance'],\n", + " 'ac-': ['▁', 'ac', '-'],\n", + " 'academic': ['▁', 'ac', 'a', 'de', 'm', 'ic'],\n", + " 'accent': ['▁', 'ac', 'cent'],\n", + " 'accents': ['▁', 'ac', 'cent', 's'],\n", + " 'accept': ['▁', 'accept'],\n", + " 'acceptable': ['▁', 'accept', 'able'],\n", + " 'accepted': ['▁', 'accept', 'ed'],\n", + " 'accepting': ['▁', 'accept', 'ing'],\n", + " 'accessories': ['▁', 'ac', 'ce', 's', 'so', 'ries'],\n", + " 'accident': ['▁', 'ac', 'c', 'id', 'ent'],\n", + " 'accidental': ['▁', 'ac', 'c', 'id', 'ent', 'al'],\n", + " 'accommodate': ['▁', 'ac', 'com', 'mo', 'date'],\n", + " 'accommodation': ['▁', 'ac', 'com', 'mo', 'd', 'ation'],\n", + " 'accompanied': ['▁', 'ac', 'com', 'pan', 'i', 'ed'],\n", + " 'accompanist': ['▁', 'ac', 'com', 'pan', 'is', 't'],\n", + " 'accompany': ['▁', 'ac', 'com', 'p', 'any'],\n", + " 'accomplished': ['▁', 'ac', 'com', 'p', 'l', 'ish', 'ed'],\n", + " 'accomplishments': ['▁', 'ac', 'com', 'p', 'l', 'ish', 'ment', 's'],\n", + " 'according': ['▁', 'ac', 'c', 'or', 'd', 'ing'],\n", + " 'account': ['▁', 'ac', 'count'],\n", + " 'accountancy': ['▁', 'ac', 'count', 'an', 'c', 'y'],\n", + " 'accra': ['▁', 'ac', 'c', 'ra'],\n", + " \"accra's\": ['▁', 'ac', 'c', 'ra', \"'\", 's'],\n", + " 'accuracy': ['▁', 'ac', 'cur', 'ac', 'y'],\n", + " 'accurate': ['▁', 'ac', 'cur', 'ate'],\n", + " 'accurately': ['▁', 'ac', 'cur', 'ate', 'ly'],\n", + " 'accused': ['▁', 'ac', 'c', 'used'],\n", + " 'achieved': ['▁', 'a', 'ch', 'i', 'e', 'v', 'ed'],\n", + " 'achievement': ['▁', 'a', 'ch', 'i', 'e', 've', 'ment'],\n", + " 'acquaintance': ['▁', 'ac', 'q', 'u', 'a', 'in', 't', 'ance'],\n", + " 'acquaintances': ['▁', 'ac', 'q', 'u', 'a', 'in', 't', 'ance', 's'],\n", + " 'acres': ['▁', 'ac', 're', 's'],\n", + " 'across': ['▁', 'a', 'cross'],\n", + " 'act': ['▁', 'act'],\n", + " 'acting': ['▁', 'act', 'ing'],\n", + " 'action': ['▁', 'action'],\n", + " 'actions': ['▁', 'action', 's'],\n", + " 'active': ['▁', 'act', 'ive'],\n", + " 'activists': ['▁', 'act', 'i', 'vi', 'st', 's'],\n", + " 'activities': ['▁', 'act', 'i', 'v', 'it', 'ies'],\n", + " 'activity': ['▁', 'act', 'i', 'v', 'ity'],\n", + " 'acton': ['▁', 'act', 'on'],\n", + " 'actor': ['▁', 'act', 'or'],\n", + " 'actress': ['▁', 'act', 're', 's', 's'],\n", + " 'acts': ['▁', 'act', 's'],\n", + " 'actual': ['▁', 'act', 'ual'],\n", + " 'actually': ['▁', 'act', 'ual', 'ly'],\n", + " 'adamafio': ['▁', 'ad', 'a', 'ma', 'f', 'i', 'o'],\n", + " 'adaptation': ['▁', 'ad', 'ap', 't', 'ation'],\n", + " 'adapted': ['▁', 'ad', 'ap', 'ted'],\n", + " 'adapting': ['▁', 'ad', 'ap', 't', 'ing'],\n", + " 'add': ['▁', 'ad', 'd'],\n", + " 'added': ['▁', 'ad', 'd', 'ed'],\n", + " 'adding': ['▁', 'adding'],\n", + " 'addition': ['▁', 'ad', 'd', 'it', 'ion'],\n", + " 'additions': ['▁', 'ad', 'd', 'it', 'ion', 's'],\n", + " 'address': ['▁', 'ad', 'dr', 'es', 's'],\n", + " 'addressed': ['▁', 'ad', 'dr', 'es', 's', 'ed'],\n", + " 'addresses': ['▁', 'ad', 'dr', 'es', 'se', 's'],\n", + " 'addressing': ['▁', 'ad', 'dr', 'es', 's', 'ing'],\n", + " 'adenauer': ['▁', 'adenauer'],\n", + " \"adenauer's\": ['▁', 'adenauer', \"'\", 's'],\n", + " 'adequate': ['▁', 'ad', 'equa', 'te'],\n", + " 'adhem': ['▁', 'ad', 'he', 'm'],\n", + " 'adjust': ['▁', 'ad', 'just'],\n", + " 'adjustment': ['▁', 'ad', 'just', 'ment'],\n", + " 'administration': ['▁', 'ad', 'ministr', 'ation'],\n", + " \"administration's\": ['▁', 'ad', 'ministr', 'ation', \"'\", 's'],\n", + " 'administrative': ['▁', 'ad', 'ministr', 'at', 'ive'],\n", + " 'admiralty': ['▁', 'ad', 'm', 'i', 'r', 'al', 'ty'],\n", + " 'admire': ['▁', 'ad', 'm', 'i', 're'],\n", + " 'admit': ['▁', 'ad', 'm', 'it'],\n", + " 'admitted': ['▁', 'ad', 'm', 'it', 'ted'],\n", + " 'admitting': ['▁', 'ad', 'm', 'it', 't', 'ing'],\n", + " 'adopted': ['▁', 'a', 'do', 'p', 'ted'],\n", + " 'adopting': ['▁', 'a', 'do', 'p', 't', 'ing'],\n", + " 'adoption': ['▁', 'a', 'do', 'p', 'tion'],\n", + " 'adult': ['▁', 'ad', 'ul', 't'],\n", + " 'advance': ['▁', 'ad', 'v', 'ance'],\n", + " 'advanced': ['▁', 'ad', 'v', 'ance', 'd'],\n", + " 'advancing': ['▁', 'ad', 'v', 'an', 'c', 'ing'],\n", + " 'advantage': ['▁', 'advantage'],\n", + " 'advantages': ['▁', 'advantage', 's'],\n", + " 'advertisement': ['▁', 'ad', 'ver', 't', 'is', 'e', 'ment'],\n", + " 'advertisements': ['▁', 'ad', 'ver', 't', 'is', 'ements'],\n", + " 'advice': ['▁', 'advi', 'ce'],\n", + " 'advisability': ['▁', 'advi', 's', 'a', 'b', 'il', 'ity'],\n", + " 'advise': ['▁', 'advise'],\n", + " 'advised': ['▁', 'advise', 'd'],\n", + " 'advisers': ['▁', 'advise', 'r', 's'],\n", + " 'advocate': ['▁', 'ad', 'v', 'o', 'c', 'ate'],\n", + " 'af-': ['▁', 'a', 'f', '-'],\n", + " 'affairs': ['▁', 'a', 'f', 'f', 'air', 's'],\n", + " 'affected': ['▁', 'a', 'f', 'fe', 'c', 'ted'],\n", + " 'affection': ['▁', 'a', 'f', 'fe', 'c', 'tion'],\n", + " 'affilia-': ['▁', 'a', 'f', 'f', 'il', 'i', 'a', '-'],\n", + " 'affiliations': ['▁', 'a', 'f', 'f', 'il', 'i', 'ation', 's'],\n", + " 'affluence': ['▁', 'a', 'f', 'f', 'l', 'u', 'ence'],\n", + " 'affluent': ['▁', 'a', 'f', 'f', 'l', 'u', 'ent'],\n", + " 'afford': ['▁', 'a', 'f', 'for', 'd'],\n", + " 'afraid': ['▁', 'a', 'fr', 'a', 'id'],\n", + " 'africa': ['▁', 'africa'],\n", + " \"africa's\": ['▁', 'africa', \"'\", 's'],\n", + " 'african': ['▁', 'african'],\n", + " 'africans': ['▁', 'african', 's'],\n", + " 'after': ['▁', 'after'],\n", + " 'afternoon': ['▁', 'after', 'no', 'on'],\n", + " 'afterwards': ['▁', 'after', 'ward', 's'],\n", + " 'again': ['▁', 'again'],\n", + " 'against': ['▁', 'against'],\n", + " 'age': ['▁', 'age'],\n", + " 'age-structure': ['▁', 'age', '-', 's', 'tru', 'c', 'ture'],\n", + " 'aged': ['▁', 'aged'],\n", + " 'ageing': ['▁', 'age', 'ing'],\n", + " 'agent': ['▁', 'a', 'g', 'ent'],\n", + " 'agents': ['▁', 'a', 'g', 'ent', 's'],\n", + " 'ages': ['▁', 'age', 's'],\n", + " 'agitation': ['▁', 'a', 'g', 'it', 'ation'],\n", + " 'ago': ['▁', 'a', 'go'],\n", + " 'agree': ['▁', 'agree'],\n", + " 'agreed': ['▁', 'agree', 'd'],\n", + " 'agreement': ['▁', 'agree', 'ment'],\n", + " 'agreements': ['▁', 'agree', 'ment', 's'],\n", + " 'agriculture': ['▁', 'a', 'gr', 'ic', 'ul', 'ture'],\n", + " 'ahead': ['▁', 'a', 'head'],\n", + " 'aid': ['▁', 'a', 'id'],\n", + " 'aide': ['▁', 'a', 'i', 'de'],\n", + " 'aided': ['▁', 'a', 'id', 'ed'],\n", + " 'aides': ['▁', 'a', 'id', 'es'],\n", + " 'aim': ['▁', 'a', 'im'],\n", + " 'aimed': ['▁', 'a', 'im', 'ed'],\n", + " 'aiming': ['▁', 'a', 'im', 'ing'],\n", + " 'air': ['▁', 'air'],\n", + " 'aircraft': ['▁', 'air', 'craft'],\n", + " 'aired': ['▁', 'air', 'ed'],\n", + " \"airliner's\": ['▁', 'air', 'line', 'r', \"'\", 's'],\n", + " 'airmen': ['▁', 'air', 'men'],\n", + " 'airport': ['▁', 'air', 'port'],\n", + " 'akin': ['▁', 'a', 'k', 'in'],\n", + " \"aladdin's\": ['▁', 'al', 'ad', 'd', 'in', \"'\", 's'],\n", + " 'alan': ['▁', 'al', 'an'],\n", + " 'alarm': ['▁', 'al', 'arm'],\n", + " 'alarmed': ['▁', 'al', 'arm', 'ed'],\n", + " 'alas': ['▁', 'al', 'as'],\n", + " 'alcoholic': ['▁', 'al', 'co', 'ho', 'li', 'c'],\n", + " 'algeria': ['▁', 'al', 'g', 'er', 'i', 'a'],\n", + " 'alike': ['▁', 'a', 'like'],\n", + " 'alive': ['▁', 'a', 'live'],\n", + " 'all': ['▁', 'all'],\n", + " 'all-regular': ['▁', 'all', '-', 'regular'],\n", + " 'alleged': ['▁', 'al', 'leg', 'ed'],\n", + " 'allen': ['▁', 'all', 'en'],\n", + " 'alleviation': ['▁', 'alleviation'],\n", + " 'alley': ['▁', 'al', 'le', 'y'],\n", + " 'alliance': ['▁', 'all', 'i', 'ance'],\n", + " 'alliances': ['▁', 'all', 'i', 'ance', 's'],\n", + " 'allied': ['▁', 'all', 'i', 'ed'],\n", + " 'allies': ['▁', 'all', 'ies'],\n", + " 'allow': ['▁', 'allow'],\n", + " 'allowance': ['▁', 'allow', 'ance'],\n", + " 'allowances': ['▁', 'allow', 'ance', 's'],\n", + " 'allowed': ['▁', 'allow', 'ed'],\n", + " 'allowing': ['▁', 'allow', 'ing'],\n", + " 'ally': ['▁', 'al', 'ly'],\n", + " 'almost': ['▁', 'al', 'most'],\n", + " 'alone': ['▁', 'al', 'one'],\n", + " 'along': ['▁', 'a', 'long'],\n", + " 'alongside': ['▁', 'a', 'long', 'side'],\n", + " 'aloud': ['▁', 'a', 'lo', 'ud'],\n", + " 'already': ['▁', 'al', 'read', 'y'],\n", + " 'also': ['▁', 'also'],\n", + " 'alter': ['▁', 'al', 'ter'],\n", + " 'alternative': ['▁', 'al', 'ter', 'n', 'at', 'ive'],\n", + " 'alternatively': ['▁', 'al', 'ter', 'n', 'at', 'ive', 'ly'],\n", + " 'alternatives': ['▁', 'al', 'ter', 'n', 'at', 'ive', 's'],\n", + " 'although': ['▁', 'al', 'though'],\n", + " 'altogether': ['▁', 'al', 'together'],\n", + " 'altos': ['▁', 'al', 'to', 's'],\n", + " 'always': ['▁', 'always'],\n", + " 'am': ['▁', 'am'],\n", + " 'amateur': ['▁', 'am', 'ate', 'ur'],\n", + " 'amazed': ['▁', 'a', 'ma', 'z', 'ed'],\n", + " 'amazing': ['▁', 'a', 'ma', 'z', 'ing'],\n", + " 'ambassador': ['▁', 'am', 'bas', 's', 'ad', 'or'],\n", + " 'amber': ['▁', 'a', 'mber'],\n", + " 'ambition': ['▁', 'am', 'b', 'it', 'ion'],\n", + " 'ambitious': ['▁', 'am', 'b', 'it', 'i', 'ous'],\n", + " 'ambulance': ['▁', 'am', 'b', 'ul', 'ance'],\n", + " 'ambulances': ['▁', 'am', 'b', 'ul', 'ance', 's'],\n", + " 'america': ['▁', 'america'],\n", + " \"america's\": ['▁', 'america', \"'\", 's'],\n", + " 'american': ['▁', 'american'],\n", + " 'american-born': ['▁', 'american', '-', 'b', 'or', 'n'],\n", + " 'americans': ['▁', 'american', 's'],\n", + " 'amid': ['▁', 'am', 'id'],\n", + " 'ammunition': ['▁', 'am', 'm', 'un', 'it', 'ion'],\n", + " 'among': ['▁', 'among'],\n", + " 'amount': ['▁', 'a', 'mo', 'un', 't'],\n", + " 'ample': ['▁', 'amp', 'le'],\n", + " 'amusement': ['▁', 'am', 'use', 'ment'],\n", + " 'amusing': ['▁', 'am', 'us', 'ing'],\n", + " 'an': ['▁', 'an'],\n", + " 'analogy': ['▁', 'an', 'a', 'lo', 'g', 'y'],\n", + " 'analysed': ['▁', 'an', 'a', 'ly', 's', 'ed'],\n", + " 'anchor': ['▁', 'an', 'ch', 'or'],\n", + " 'ancient': ['▁', 'an', 'c', 'i', 'ent'],\n", + " 'and': ['▁', 'and'],\n", + " 'andrei': ['▁', 'and', 're', 'i'],\n", + " 'andrew': ['▁', 'and', 're', 'w'],\n", + " 'anecdotal': ['▁', 'an', 'e', 'c', 'do', 't', 'al'],\n", + " 'angel': ['▁', 'ang', 'el'],\n", + " 'angeles': ['▁', 'ang', 'el', 'es'],\n", + " 'angelo': ['▁', 'ang', 'e', 'lo'],\n", + " 'anger': ['▁', 'ang', 'er'],\n", + " 'anglais': ['▁', 'ang', 'la', 'is'],\n", + " 'angle': ['▁', 'ang', 'le'],\n", + " 'anglesey': ['▁', 'anglesey'],\n", + " \"anglesey's\": ['▁', 'anglesey', \"'\", 's'],\n", + " 'anglesey-road': ['▁', 'anglesey', '-', 'ro', 'ad'],\n", + " 'angola': ['▁', 'an', 'go', 'la'],\n", + " 'angrily': ['▁', 'an', 'gr', 'i', 'ly'],\n", + " 'angry': ['▁', 'ang', 'ry'],\n", + " 'ann': ['▁', 'an', 'n'],\n", + " 'anna': ['▁', 'an', 'n', 'a'],\n", + " 'announced': ['▁', 'an', 'no', 'un', 'c', 'ed'],\n", + " 'announcement': ['▁', 'an', 'no', 'un', 'ce', 'ment'],\n", + " 'announcing': ['▁', 'an', 'no', 'un', 'c', 'ing'],\n", + " 'annoyed': ['▁', 'an', 'no', 'y', 'ed'],\n", + " 'annual': ['▁', 'an', 'n', 'ual'],\n", + " 'another': ['▁', 'another'],\n", + " 'answer': ['▁', 'answer'],\n", + " 'answered': ['▁', 'answer', 'ed'],\n", + " 'answering': ['▁', 'answer', 'ing'],\n", + " 'antagonism': ['▁', 'ant', 'a', 'g', 'on', 'is', 'm'],\n", + " 'anthony': ['▁', 'an', 'th', 'on', 'y'],\n", + " 'anti-apartheid': ['▁', 'ant', 'i', '-', 'a', 'part', 'he', 'id'],\n", + " 'anti-bomb': ['▁', 'ant', 'i', '-', 'bomb'],\n", + " 'anti-german': ['▁', 'ant', 'i', '-', 'german'],\n", + " 'anti-nato': ['▁', 'ant', 'i', '-', 'nato'],\n", + " 'anti-negro': ['▁', 'ant', 'i', '-', 'negro'],\n", + " 'anti-nuclear': ['▁', 'ant', 'i', '-', 'nuclear'],\n", + " 'anti-soviet': ['▁', 'ant', 'i', '-', 'soviet'],\n", + " 'anti-tory': ['▁', 'ant', 'i', '-', 'tory'],\n", + " 'anticipation': ['▁', 'an', 'tic', 'ip', 'ation'],\n", + " 'antonioni': ['▁', 'ant', 'on', 'ion', 'i'],\n", + " \"antonioni's\": ['▁', 'ant', 'on', 'ion', 'i', \"'\", 's'],\n", + " 'any': ['▁', 'any'],\n", + " 'any-': ['▁', 'any', '-'],\n", + " 'anybody': ['▁', 'any', 'body'],\n", + " \"anybody's\": ['▁', 'any', 'body', \"'\", 's'],\n", + " 'anyone': ['▁', 'any', 'one'],\n", + " 'anything': ['▁', 'any', 'thing'],\n", + " 'anyway': ['▁', 'any', 'way'],\n", + " 'apart': ['▁', 'a', 'part'],\n", + " 'apartheid': ['▁', 'a', 'part', 'he', 'id'],\n", + " 'apathetic': ['▁', 'a', 'pa', 'the', 'tic'],\n", + " 'apathy': ['▁', 'a', 'pa', 'th', 'y'],\n", + " 'apex': ['▁', 'ap', 'ex'],\n", + " 'apocalypse': ['▁', 'a', 'po', 'c', 'a', 'ly', 'p', 'se'],\n", + " 'apologising': ['▁', 'a', 'po', 'lo', 'g', 'is', 'ing'],\n", + " 'appalled': ['▁', 'app', 'all', 'ed'],\n", + " 'appalling': ['▁', 'app', 'all', 'ing'],\n", + " 'apparatus': ['▁', 'app', 'ar', 'at', 'us'],\n", + " 'apparent': ['▁', 'app', 'ar', 'ent'],\n", + " 'apparently': ['▁', 'app', 'ar', 'ent', 'ly'],\n", + " 'appeal': ['▁', 'appeal'],\n", + " 'appealing': ['▁', 'appeal', 'ing'],\n", + " 'appeals': ['▁', 'appeal', 's'],\n", + " 'appear': ['▁', 'appear'],\n", + " 'appearance': ['▁', 'appear', 'ance'],\n", + " 'appeared': ['▁', 'appear', 'ed'],\n", + " 'appears': ['▁', 'appear', 's'],\n", + " 'appeasement': ['▁', 'app', 'e', 'a', 'se', 'ment'],\n", + " 'applauding': ['▁', 'app', 'la', 'ud', 'ing'],\n", + " 'appliances': ['▁', 'app', 'li', 'ance', 's'],\n", + " 'application': ['▁', 'app', 'li', 'c', 'ation'],\n", + " 'applications': ['▁', 'app', 'li', 'c', 'ation', 's'],\n", + " 'applied': ['▁', 'app', 'li', 'ed'],\n", + " 'apply': ['▁', 'app', 'ly'],\n", + " 'appointed': ['▁', 'ap', 'point', 'ed'],\n", + " 'appointment': ['▁', 'ap', 'point', 'ment'],\n", + " 'appreciable': ['▁', 'app', 're', 'c', 'i', 'able'],\n", + " 'appreciably': ['▁', 'app', 're', 'c', 'i', 'ably'],\n", + " 'appreciated': ['▁', 'app', 're', 'c', 'i', 'at', 'ed'],\n", + " 'appreciation': ['▁', 'app', 're', 'c', 'i', 'ation'],\n", + " 'apprenticeships': ['▁', 'app', 'r', 'ent', 'i', 'ce', 'ship', 's'],\n", + " 'approach': ['▁', 'ap', 'pro', 'a', 'ch'],\n", + " 'approached': ['▁', 'ap', 'pro', 'a', 'ch', 'ed'],\n", + " 'approaches': ['▁', 'ap', 'pro', 'a', 'che', 's'],\n", + " 'appropriate': ['▁', 'ap', 'pro', 'pri', 'ate'],\n", + " 'appropriated': ['▁', 'ap', 'pro', 'pri', 'at', 'ed'],\n", + " 'approval': ['▁', 'ap', 'pro', 'val'],\n", + " 'approximately': ['▁', 'ap', 'pro', 'x', 'im', 'ate', 'ly'],\n", + " 'april': ['▁', 'a', 'pri', 'l'],\n", + " 'archbishop': ['▁', 'ar', 'ch', 'b', 'is', 'hop'],\n", + " 'arches': ['▁', 'ar', 'che', 's'],\n", + " 'archipelago': ['▁', 'ar', 'ch', 'i', 'pe', 'la', 'go'],\n", + " 'architect': ['▁', 'ar', 'ch', 'it', 'e', 'c', 't'],\n", + " 'architecture': ['▁', 'ar', 'ch', 'it', 'e', 'c', 'ture'],\n", + " 'are': ['▁', 'are'],\n", + " 'area': ['▁', 'are', 'a'],\n", + " 'areas': ['▁', 'are', 'as'],\n", + " \"aren't\": ['▁', 'are', 'n', \"'\", 't'],\n", + " 'arguably': ['▁', 'ar', 'gu', 'ably'],\n", + " 'argued': ['▁', 'ar', 'gu', 'ed'],\n", + " 'argues': ['▁', 'ar', 'gu', 'es'],\n", + " 'arguing': ['▁', 'ar', 'gu', 'ing'],\n", + " 'argument': ['▁', 'ar', 'gu', 'ment'],\n", + " 'arguments': ['▁', 'ar', 'gu', 'ment', 's'],\n", + " 'arise': ['▁', 'a', 'rise'],\n", + " 'arises': ['▁', 'a', 'rise', 's'],\n", + " 'arm': ['▁', 'arm'],\n", + " 'armament': ['▁', 'arm', 'a', 'ment'],\n", + " 'armaments': ['▁', 'arm', 'a', 'ment', 's'],\n", + " 'armed': ['▁', 'arm', 'ed'],\n", + " 'armoured': ['▁', 'arm', 'our', 'ed'],\n", + " 'arms': ['▁', 'arm', 's'],\n", + " \"arms'\": ['▁', 'arm', 's', \"'\"],\n", + " 'army': ['▁', 'arm', 'y'],\n", + " 'arnold': ['▁', 'ar', 'n', 'old'],\n", + " 'arose': ['▁', 'a', 'ro', 'se'],\n", + " 'around': ['▁', 'a', 'round'],\n", + " 'aroused': ['▁', 'ar', 'ous', 'ed'],\n", + " 'arrange': ['▁', 'ar', 'range'],\n", + " 'arranged': ['▁', 'ar', 'range', 'd'],\n", + " 'arrangement': ['▁', 'ar', 'range', 'ment'],\n", + " 'arrangements': ['▁', 'ar', 'range', 'ment', 's'],\n", + " 'arranging': ['▁', 'ar', 'r', 'ang', 'ing'],\n", + " 'arrears': ['▁', 'ar', 're', 'ar', 's'],\n", + " 'arrested': ['▁', 'ar', 'rest', 'ed'],\n", + " 'arrival': ['▁', 'ar', 'r', 'i', 'val'],\n", + " 'arrive': ['▁', 'ar', 'r', 'ive'],\n", + " 'arrived': ['▁', 'arrived'],\n", + " 'arrives': ['▁', 'ar', 'r', 'ive', 's'],\n", + " 'arrogant': ['▁', 'ar', 'ro', 'g', 'ant'],\n", + " 'art': ['▁', 'ar', 't'],\n", + " 'arthur': ['▁', 'ar', 'th', 'ur'],\n", + " 'article': ['▁', 'ar', 'tic', 'le'],\n", + " 'articles': ['▁', 'ar', 'tic', 'le', 's'],\n", + " 'articulation': ['▁', 'ar', 'tic', 'ul', 'ation'],\n", + " 'artistic': ['▁', 'ar', 'tist', 'ic'],\n", + " 'artistically': ['▁', 'ar', 'tist', 'ical', 'ly'],\n", + " 'artistry': ['▁', 'ar', 'tist', 'ry'],\n", + " 'artists': ['▁', 'ar', 'tist', 's'],\n", + " 'as': ['▁', 'as'],\n", + " 'ascents': ['▁', 'as', 'cent', 's'],\n", + " 'ash': ['▁', 'as', 'h'],\n", + " 'ashen': ['▁', 'as', 'he', 'n'],\n", + " 'ask': ['▁', 'as', 'k'],\n", + " 'asked': ['▁', 'asked'],\n", + " 'asking': ['▁', 'asking'],\n", + " 'aspect': ['▁', 'a', 'spect'],\n", + " 'aspects': ['▁', 'a', 'spect', 's'],\n", + " 'aspiring': ['▁', 'as', 'p', 'i', 'r', 'ing'],\n", + " 'assault': ['▁', 'as', 's', 'a', 'ul', 't'],\n", + " 'assembler': ['▁', 'as', 'se', 'm', 'bl', 'er'],\n", + " 'assembly': ['▁', 'as', 'se', 'm', 'b', 'ly'],\n", + " 'assess': ['▁', 'as', 'se', 's', 's'],\n", + " 'assessment': ['▁', 'as', 'se', 's', 's', 'ment'],\n", + " 'assistance': ['▁', 'as', 's', 'istance'],\n", + " 'assistant': ['▁', 'as', 's', 'is', 't', 'ant'],\n", + " 'assistants': ['▁', 'as', 's', 'is', 't', 'ant', 's'],\n", + " 'associate': ['▁', 'associat', 'e'],\n", + " 'associated': ['▁', 'associat', 'ed'],\n", + " 'associates': ['▁', 'associat', 'es'],\n", + " 'association': ['▁', 'associat', 'ion'],\n", + " 'assortment': ['▁', 'as', 's', 'or', 't', 'ment'],\n", + " 'assumption': ['▁', 'assumption'],\n", + " 'assurance': ['▁', 'as', 's', 'ur', 'ance'],\n", + " 'astronaut': ['▁', 'as', 'tr', 'on', 'a', 'u', 't'],\n", + " 'astute': ['▁', 'a', 'st', 'u', 'te'],\n", + " 'at': ['▁', 'at'],\n", + " 'ately': ['▁', 'ate', 'ly'],\n", + " 'atkinson': ['▁', 'at', 'k', 'in', 's', 'on'],\n", + " 'atlantic': ['▁', 'at', 'l', 'an', 'tic'],\n", + " 'atmosphere': ['▁', 'atmospher', 'e'],\n", + " 'atmospheric': ['▁', 'atmospher', 'ic'],\n", + " 'atomic': ['▁', 'a', 'to', 'm', 'ic'],\n", + " 'atoms': ['▁', 'a', 'to', 'm', 's'],\n", + " 'attach': ['▁', 'at', 't', 'a', 'ch'],\n", + " 'attached': ['▁', 'at', 't', 'a', 'ch', 'ed'],\n", + " 'attack': ['▁', 'at', 't', 'a', 'ck'],\n", + " 'attacked': ['▁', 'at', 't', 'a', 'ck', 'ed'],\n", + " 'attacks': ['▁', 'at', 't', 'a', 'ck', 's'],\n", + " 'attainable': ['▁', 'at', 'tain', 'able'],\n", + " 'attempt': ['▁', 'attempt'],\n", + " 'attempted': ['▁', 'attempt', 'ed'],\n", + " 'attempting': ['▁', 'attempt', 'ing'],\n", + " 'attempts': ['▁', 'attempt', 's'],\n", + " 'atten-': ['▁', 'at', 'ten', '-'],\n", + " 'attend': ['▁', 'at', 't', 'end'],\n", + " 'attendance': ['▁', 'at', 't', 'end', 'ance'],\n", + " 'attended': ['▁', 'at', 't', 'end', 'ed'],\n", + " 'attending': ['▁', 'at', 't', 'end', 'ing'],\n", + " 'attention': ['▁', 'at', 'ten', 'tion'],\n", + " 'attitude': ['▁', 'at', 't', 'it', 'u', 'de'],\n", + " 'attitudes': ['▁', 'at', 't', 'it', 'ud', 'es'],\n", + " 'attracted': ['▁', 'at', 'tr', 'act', 'ed'],\n", + " 'attractive': ['▁', 'at', 'tr', 'act', 'ive'],\n", + " 'aubrey': ['▁', 'a', 'u', 'b', 're', 'y'],\n", + " 'audacity': ['▁', 'a', 'ud', 'ac', 'ity'],\n", + " 'auden': ['▁', 'a', 'ud', 'en'],\n", + " 'audience': ['▁', 'a', 'ud', 'i', 'ence'],\n", + " 'audio-tv': ['▁', 'a', 'ud', 'i', 'o', '-', 't', 'v'],\n", + " 'audited': ['▁', 'a', 'ud', 'it', 'ed'],\n", + " 'august': ['▁', 'a', 'ug', 'u', 'st'],\n", + " 'auntie': ['▁', 'a', 'un', 't', 'i', 'e'],\n", + " 'austerity': ['▁', 'a', 'u', 'ster', 'ity'],\n", + " 'australia': ['▁', 'a', 'us', 'tr', 'al', 'i', 'a'],\n", + " 'austria': ['▁', 'a', 'us', 'tri', 'a'],\n", + " 'austrian': ['▁', 'a', 'us', 'tri', 'an'],\n", + " 'authentic': ['▁', 'a', 'u', 'then', 'tic'],\n", + " 'author': ['▁', 'author'],\n", + " 'authorised': ['▁', 'author', 'is', 'ed'],\n", + " 'authorities': ['▁', 'author', 'it', 'ies'],\n", + " 'authority': ['▁', 'author', 'ity'],\n", + " 'automatically': ['▁', 'a', 'u', 'to', 'm', 'at', 'ical', 'ly'],\n", + " 'automation': ['▁', 'a', 'u', 'to', 'm', 'ation'],\n", + " 'autumn': ['▁', 'a', 'u', 't', 'um', 'n'],\n", + " 'available': ['▁', 'a', 'v', 'a', 'il', 'able'],\n", + " 'avenue': ['▁', 'a', 've', 'n', 'ue'],\n", + " 'average': ['▁', 'a', 'ver', 'age'],\n", + " 'averages': ['▁', 'a', 'ver', 'age', 's'],\n", + " 'avert': ['▁', 'a', 'ver', 't'],\n", + " 'aviation': ['▁', 'a', 'vi', 'ation'],\n", + " 'avoid': ['▁', 'a', 'v', 'o', 'id'],\n", + " 'avoided': ['▁', 'a', 'v', 'o', 'id', 'ed'],\n", + " 'avon': ['▁', 'a', 'v', 'on'],\n", + " 'awake': ['▁', 'a', 'w', 'a', 'ke'],\n", + " 'awarded': ['▁', 'a', 'ward', 'ed'],\n", + " 'awards': ['▁', 'a', 'ward', 's'],\n", + " 'aware': ['▁', 'a', 'w', 'are'],\n", + " 'awareness': ['▁', 'a', 'w', 'are', 'ness'],\n", + " 'away': ['▁', 'a', 'way'],\n", + " 'awful': ['▁', 'a', 'w', 'ful'],\n", + " 'awfully': ['▁', 'a', 'w', 'ful', 'ly'],\n", + " 'b': ['▁', 'b'],\n", + " 'b.': ['▁', 'b', '.'],\n", + " 'b.b.c.': ['▁', 'b', '.', 'b', '.', 'c', '.'],\n", + " 'babe': ['▁', 'b', 'a', 'be'],\n", + " 'babel': ['▁', 'b', 'a', 'be', 'l'],\n", + " 'bably': ['▁', 'b', 'ably'],\n", + " 'baby': ['▁', 'b', 'a', 'by'],\n", + " \"baby's\": ['▁', 'b', 'a', 'by', \"'\", 's'],\n", + " 'back': ['▁', 'back'],\n", + " 'backbone': ['▁', 'back', 'b', 'one'],\n", + " 'backed': ['▁', 'back', 'ed'],\n", + " 'backers': ['▁', 'back', 'ers'],\n", + " 'background': ['▁', 'back', 'ground'],\n", + " 'backing': ['▁', 'back', 'ing'],\n", + " 'backstage': ['▁', 'back', 'st', 'age'],\n", + " 'backward': ['▁', 'back', 'ward'],\n", + " 'bad': ['▁', 'b', 'ad'],\n", + " 'badly': ['▁', 'b', 'ad', 'ly'],\n", + " 'baffled': ['▁', 'b', 'a', 'f', 'f', 'led'],\n", + " 'bag': ['▁', 'b', 'a', 'g'],\n", + " 'bagaya': ['▁', 'b', 'a', 'gay', 'a'],\n", + " 'baker': ['▁', 'b', 'a', 'k', 'er'],\n", + " 'balance': ['▁', 'b', 'al', 'ance'],\n", + " 'balance-sheet': ['▁', 'b', 'al', 'ance', '-', 'she', 'e', 't'],\n", + " 'balances': ['▁', 'b', 'al', 'ance', 's'],\n", + " 'bald': ['▁', 'b', 'al', 'd'],\n", + " 'ball': ['▁', 'b', 'all'],\n", + " 'balloon': ['▁', 'b', 'all', 'o', 'on'],\n", + " 'ballyhoo': ['▁', 'b', 'al', 'ly', 'ho', 'o'],\n", + " 'baltic': ['▁', 'b', 'al', 'tic'],\n", + " 'ban': ['▁', 'b', 'an'],\n", + " 'ban-': ['▁', 'b', 'an', '-'],\n", + " 'ban-the-': ['▁', 'b', 'an', '-', 'the', '-'],\n", + " 'ban-the-bomb': ['▁', 'b', 'an', '-', 'the', '-', 'bomb'],\n", + " 'bank': ['▁', 'bank'],\n", + " \"bank's\": ['▁', 'bank', \"'\", 's'],\n", + " 'banking': ['▁', 'bank', 'ing'],\n", + " 'bankrupt': ['▁', 'bank', 'r', 'up', 't'],\n", + " 'banks': ['▁', 'bank', 's'],\n", + " \"banks'\": ['▁', 'bank', 's', \"'\"],\n", + " 'banned': ['▁', 'b', 'an', 'n', 'ed'],\n", + " 'banzie': ['▁', 'b', 'an', 'z', 'i', 'e'],\n", + " 'bar': ['▁', 'b', 'ar'],\n", + " 'barb': ['▁', 'b', 'ar', 'b'],\n", + " 'barbara': ['▁', 'b', 'ar', 'b', 'ar', 'a'],\n", + " 'barbarously': ['▁', 'b', 'ar', 'b', 'ar', 'ous', 'ly'],\n", + " 'barclay': ['▁', 'b', 'ar', 'clay'],\n", + " 'bare': ['▁', 'b', 'are'],\n", + " 'bargain': ['▁', 'b', 'ar', 'g', 'a', 'in'],\n", + " 'bargaining': ['▁', 'b', 'ar', 'g', 'a', 'in', 'ing'],\n", + " 'bark': ['▁', 'b', 'ar', 'k'],\n", + " 'barrier': ['▁', 'b', 'ar', 'r', 'i', 'er'],\n", + " 'barriers': ['▁', 'b', 'ar', 'r', 'i', 'ers'],\n", + " 'barry': ['▁', 'b', 'a', 'rry'],\n", + " 'base': ['▁', 'base'],\n", + " 'based': ['▁', 'bas', 'ed'],\n", + " 'bases': ['▁', 'base', 's'],\n", + " 'basic': ['▁', 'bas', 'ic'],\n", + " 'basin': ['▁', 'bas', 'in'],\n", + " 'basing': ['▁', 'bas', 'ing'],\n", + " 'basis': ['▁', 'bas', 'is'],\n", + " 'baskerville': ['▁', 'bas', 'k', 'er', 'v', 'il', 'le'],\n", + " 'basses': ['▁', 'bas', 'se', 's'],\n", + " 'basting': ['▁', 'bas', 't', 'ing'],\n", + " 'bathing': ['▁', 'b', 'a', 'thing'],\n", + " 'bats': ['▁', 'b', 'at', 's'],\n", + " 'batsman': ['▁', 'b', 'at', 's', 'man'],\n", + " 'battalions': ['▁', 'b', 'at', 't', 'al', 'ion', 's'],\n", + " 'batting': ['▁', 'b', 'at', 't', 'ing'],\n", + " 'battle': ['▁', 'b', 'a', 'ttle'],\n", + " 'bavaria': ['▁', 'b', 'a', 'v', 'ar', 'i', 'a'],\n", + " 'bavarian': ['▁', 'b', 'a', 'v', 'ar', 'i', 'an'],\n", + " 'bavarians': ['▁', 'b', 'a', 'v', 'ar', 'i', 'an', 's'],\n", + " 'bay': ['▁', 'b', 'a', 'y'],\n", + " 'be': ['▁', 'be'],\n", + " 'beach': ['▁', 'b', 'each'],\n", + " 'beaches': ['▁', 'b', 'each', 'es'],\n", + " 'beacon': ['▁', 'be', 'a', 'con'],\n", + " 'beaks': ['▁', 'be', 'a', 'k', 's'],\n", + " 'bean': ['▁', 'be', 'an'],\n", + " 'bear': ['▁', 'be', 'ar'],\n", + " 'bearer': ['▁', 'be', 'are', 'r'],\n", + " 'bears': ['▁', 'be', 'ar', 's'],\n", + " 'beastly': ['▁', 'b', 'east', 'ly'],\n", + " 'beasts': ['▁', 'b', 'east', 's'],\n", + " 'beaten': ['▁', 'be', 'a', 'ten'],\n", + " 'beautiful': ['▁', 'be', 'a', 'u', 't', 'i', 'ful'],\n", + " 'beautifully': ['▁', 'be', 'a', 'u', 't', 'i', 'ful', 'ly'],\n", + " 'beauty': ['▁', 'be', 'a', 'u', 'ty'],\n", + " 'became': ['▁', 'be', 'came'],\n", + " 'because': ['▁', 'because'],\n", + " 'beckoning': ['▁', 'be', 'ck', 'on', 'ing'],\n", + " 'become': ['▁', 'be', 'come'],\n", + " 'becomes': ['▁', 'be', 'come', 's'],\n", + " 'becoming': ['▁', 'be', 'com', 'ing'],\n", + " 'bed': ['▁', 'b', 'ed'],\n", + " 'bedlam': ['▁', 'b', 'ed', 'la', 'm'],\n", + " 'beds': ['▁', 'b', 'ed', 's'],\n", + " 'bedspreads': ['▁', 'b', 'ed', 's', 'p', 'read', 's'],\n", + " 'beech': ['▁', 'be', 'e', 'ch'],\n", + " 'been': ['▁', 'been'],\n", + " 'before': ['▁', 'before'],\n", + " 'befriended': ['▁', 'be', 'friend', 'ed'],\n", + " 'began': ['▁', 'be', 'g', 'an'],\n", + " 'begin': ['▁', 'be', 'g', 'in'],\n", + " 'beginner': ['▁', 'be', 'g', 'in', 'n', 'er'],\n", + " 'beginning': ['▁', 'be', 'g', 'in', 'n', 'ing'],\n", + " 'begins': ['▁', 'be', 'g', 'in', 's'],\n", + " 'begun': ['▁', 'be', 'g', 'un'],\n", + " 'behan': ['▁', 'be', 'h', 'an'],\n", + " 'behave': ['▁', 'be', 'have'],\n", + " 'behaviour': ['▁', 'be', 'h', 'a', 'vi', 'our'],\n", + " 'behind': ['▁', 'behind'],\n", + " 'beier': ['▁', 'be', 'i', 'er'],\n", + " 'being': ['▁', 'being'],\n", + " 'belgian': ['▁', 'be', 'l', 'g', 'i', 'an'],\n", + " 'belgium': ['▁', 'be', 'l', 'giu', 'm'],\n", + " 'belgrade': ['▁', 'be', 'l', 'gr', 'a', 'de'],\n", + " 'belief': ['▁', 'be', 'li', 'e', 'f'],\n", + " 'believe': ['▁', 'believe'],\n", + " 'believed': ['▁', 'believed'],\n", + " 'believes': ['▁', 'believe', 's'],\n", + " 'bell': ['▁', 'be', 'll'],\n", + " \"bell's\": ['▁', 'be', 'll', \"'\", 's'],\n", + " 'belmondo': ['▁', 'be', 'l', 'mon', 'do'],\n", + " 'belonged': ['▁', 'be', 'long', 'ed'],\n", + " 'belongs': ['▁', 'be', 'long', 's'],\n", + " 'below': ['▁', 'be', 'low'],\n", + " 'belt': ['▁', 'be', 'l', 't'],\n", + " 'ben': ['▁', 'be', 'n'],\n", + " 'bench': ['▁', 'be', 'n', 'ch'],\n", + " 'benches': ['▁', 'be', 'n', 'che', 's'],\n", + " 'bend': ['▁', 'b', 'end'],\n", + " 'bending': ['▁', 'b', 'end', 'ing'],\n", + " 'benefits': ['▁', 'be', 'ne', 'f', 'its'],\n", + " 'bent': ['▁', 'b', 'ent'],\n", + " 'ber': ['▁', 'be', 'r'],\n", + " 'berlin': ['▁', 'berlin'],\n", + " \"berlin's\": ['▁', 'berlin', \"'\", 's'],\n", + " 'bernhard': ['▁', 'be', 'r', 'n', 'hard'],\n", + " 'berry': ['▁', 'be', 'rry'],\n", + " 'bertrand': ['▁', 'bert', 'r', 'and'],\n", + " 'beset': ['▁', 'be', 'set'],\n", + " 'beside': ['▁', 'be', 'side'],\n", + " 'best': ['▁', 'best'],\n", + " 'best-seller': ['▁', 'best', '-', 's', 'ell', 'er'],\n", + " 'bet': ['▁', 'be', 't'],\n", + " 'betjeman': ['▁', 'be', 't', 'je', 'man'],\n", + " 'betrayal': ['▁', 'be', 'tr', 'a', 'y', 'al'],\n", + " 'betrayed': ['▁', 'be', 'tr', 'a', 'y', 'ed'],\n", + " 'better': ['▁', 'better'],\n", + " 'better-': ['▁', 'better', '-'],\n", + " \"betti's\": ['▁', 'be', 't', 't', 'i', \"'\", 's'],\n", + " 'between': ['▁', 'between'],\n", + " 'bevel': ['▁', 'be', 've', 'l'],\n", + " 'bevelled': ['▁', 'be', 'v', 'ell', 'ed'],\n", + " 'beware': ['▁', 'be', 'w', 'are'],\n", + " 'bewildered': ['▁', 'be', 'w', 'il', 'd', 'er', 'ed'],\n", + " 'beyond': ['▁', 'beyond'],\n", + " 'bidet': ['▁', 'b', 'i', 'de', 't'],\n", + " 'big': ['▁', 'big'],\n", + " 'bigger': ['▁', 'big', 'g', 'er'],\n", + " 'biggest': ['▁', 'big', 'g', 'est'],\n", + " 'bill': ['▁', 'b', 'ill'],\n", + " 'bills': ['▁', 'b', 'ill', 's'],\n", + " 'binding': ['▁', 'b', 'in', 'd', 'ing'],\n", + " 'biological': ['▁', 'b', 'i', 'o', 'lo', 'g', 'ical'],\n", + " 'bird': ['▁', 'b', 'i', 'r', 'd'],\n", + " 'birds': ['▁', 'b', 'i', 'r', 'd', 's'],\n", + " 'bishop': ['▁', 'b', 'is', 'hop'],\n", + " 'bit': ['▁', 'b', 'it'],\n", + " 'bite': ['▁', 'b', 'it', 'e'],\n", + " 'bits': ['▁', 'b', 'its'],\n", + " 'bitter-sweet': ['▁', 'b', 'it', 'ter', '-', 's', 'we', 'e', 't'],\n", + " 'bitterest': ['▁', 'b', 'it', 'ter', 'est'],\n", + " 'bitterly': ['▁', 'b', 'it', 'ter', 'ly'],\n", + " 'bituminized': ['▁', 'b', 'it', 'um', 'in', 'i', 'z', 'ed'],\n", + " 'black': ['▁', 'bl', 'a', 'ck'],\n", + " 'black-': ['▁', 'bl', 'a', 'ck', '-'],\n", + " 'black-listed': ['▁', 'bl', 'a', 'ck', '-', 'li', 'st', 'ed'],\n", + " 'blackbird': ['▁', 'bl', 'a', 'ck', 'b', 'i', 'r', 'd'],\n", + " 'blacks': ['▁', 'bl', 'a', 'ck', 's'],\n", + " 'blame': ['▁', 'bl', 'a', 'me'],\n", + " 'blamed': ['▁', 'bl', 'am', 'ed'],\n", + " 'blander': ['▁', 'bl', 'and', 'er'],\n", + " 'blank': ['▁', 'bl', 'an', 'k'],\n", + " 'blend': ['▁', 'bl', 'end'],\n", + " 'blight': ['▁', 'b', 'light'],\n", + " 'blind': ['▁', 'bl', 'in', 'd'],\n", + " 'blinked': ['▁', 'bl', 'in', 'k', 'ed'],\n", + " 'block': ['▁', 'block'],\n", + " 'blocks': ['▁', 'block', 's'],\n", + " 'bloem-': ['▁', 'b', 'lo', 'e', 'm', '-'],\n", + " 'blond': ['▁', 'bl', 'on', 'd'],\n", + " 'blood': ['▁', 'b', 'lo', 'od'],\n", + " 'bloodstained': ['▁', 'b', 'lo', 'od', 's', 'tain', 'ed'],\n", + " 'bloody': ['▁', 'b', 'lo', 'od', 'y'],\n", + " 'blouse': ['▁', 'b', 'lo', 'use'],\n", + " 'blouses': ['▁', 'bl', 'ous', 'es'],\n", + " 'blow': ['▁', 'b', 'low'],\n", + " 'blowflies': ['▁', 'b', 'low', 'f', 'l', 'ies'],\n", + " 'blown': ['▁', 'bl', 'own'],\n", + " 'blue': ['▁', 'bl', 'ue'],\n", + " 'blunt': ['▁', 'bl', 'un', 't'],\n", + " 'bluntly': ['▁', 'bl', 'un', 't', 'ly'],\n", + " 'bluster': ['▁', 'bl', 'u', 'ster'],\n", + " 'board': ['▁', 'board'],\n", + " 'boat': ['▁', 'bo', 'at'],\n", + " 'boat-train': ['▁', 'bo', 'at', '-', 'train'],\n", + " 'bobby': ['▁', 'bo', 'b', 'by'],\n", + " 'bodies': ['▁', 'bo', 'd', 'ies'],\n", + " 'body': ['▁', 'body'],\n", + " 'boeing': ['▁', 'bo', 'e', 'ing'],\n", + " 'bogy': ['▁', 'bo', 'g', 'y'],\n", + " 'boiled': ['▁', 'bo', 'il', 'ed'],\n", + " 'boils': ['▁', 'bo', 'il', 's'],\n", + " 'bold': ['▁', 'b', 'old'],\n", + " 'boldly': ['▁', 'b', 'old', 'ly'],\n", + " 'bolt': ['▁', 'bo', 'l', 't'],\n", + " 'bolted': ['▁', 'bo', 'l', 'ted'],\n", + " 'bomb': ['▁', 'bomb'],\n", + " 'bombay': ['▁', 'bomb', 'a', 'y'],\n", + " 'bombed': ['▁', 'bomb', 'ed'],\n", + " 'bombers': ['▁', 'bomb', 'ers'],\n", + " 'bonded': ['▁', 'b', 'on', 'd', 'ed'],\n", + " 'bone': ['▁', 'b', 'one'],\n", + " 'bones': ['▁', 'b', 'one', 's'],\n", + " 'bonn': ['▁', 'b', 'on', 'n'],\n", + " \"bonn's\": ['▁', 'b', 'on', 'n', \"'\", 's'],\n", + " 'book': ['▁', 'book'],\n", + " 'booklet': ['▁', 'book', 'le', 't'],\n", + " 'books': ['▁', 'book', 's'],\n", + " 'booming': ['▁', 'bo', 'o', 'm', 'ing'],\n", + " 'border': ['▁', 'b', 'order'],\n", + " 'bore': ['▁', 'bo', 're'],\n", + " 'bored': ['▁', 'b', 'or', 'ed'],\n", + " 'boredom': ['▁', 'bo', 're', 'do', 'm'],\n", + " 'bores': ['▁', 'bo', 're', 's'],\n", + " 'born': ['▁', 'b', 'or', 'n'],\n", + " 'borough': ['▁', 'bo', 'rough'],\n", + " 'borrow': ['▁', 'b', 'or', 'ro', 'w'],\n", + " 'borstal': ['▁', 'b', 'or', 'st', 'al'],\n", + " 'bosoms': ['▁', 'bo', 'so', 'm', 's'],\n", + " 'bossed': ['▁', 'bo', 's', 's', 'ed'],\n", + " 'bosses': ['▁', 'bo', 's', 'se', 's'],\n", + " 'both': ['▁', 'both'],\n", + " 'bottle': ['▁', 'bo', 'ttle'],\n", + " 'bottom': ['▁', 'bo', 't', 'to', 'm'],\n", + " 'bought': ['▁', 'bo', 'ug', 'h', 't'],\n", + " 'boun': ['▁', 'bo', 'un'],\n", + " 'bound': ['▁', 'b', 'ound'],\n", + " 'boutiques': ['▁', 'b', 'out', 'i', 'q', 'ue', 's'],\n", + " 'bow': ['▁', 'bo', 'w'],\n", + " 'bow-street': ['▁', 'bo', 'w', '-', 'st', 're', 'e', 't'],\n", + " 'bowed': ['▁', 'bo', 'w', 'ed'],\n", + " 'bowing': ['▁', 'bo', 'w', 'ing'],\n", + " 'bows': ['▁', 'bo', 'w', 's'],\n", + " 'box': ['▁', 'bo', 'x'],\n", + " 'boxes': ['▁', 'bo', 'x', 'es'],\n", + " 'boxing': ['▁', 'bo', 'x', 'ing'],\n", + " 'boy': ['▁', 'bo', 'y'],\n", + " 'boycotted': ['▁', 'bo', 'y', 'cott', 'ed'],\n", + " 'boycotting': ['▁', 'bo', 'y', 'cott', 'ing'],\n", + " 'boyd-orr': ['▁', 'bo', 'y', 'd', '-', 'or', 'r'],\n", + " 'boyle': ['▁', 'bo', 'y', 'le'],\n", + " 'boys': ['▁', 'bo', 'y', 's'],\n", + " 'braces': ['▁', 'br', 'a', 'ce', 's'],\n", + " 'brain': ['▁', 'b', 'rain'],\n", + " 'brain-activity': ['▁', 'b', 'rain', '-', 'act', 'i', 'v', 'ity'],\n", + " 'brain-children': ['▁', 'b', 'rain', '-', 'children'],\n", + " 'brains': ['▁', 'b', 'rain', 's'],\n", + " 'brandy': ['▁', 'br', 'and', 'y'],\n", + " 'brash': ['▁', 'br', 'as', 'h'],\n", + " 'brass': ['▁', 'br', 'as', 's'],\n", + " 'brauchitsch': ['▁', 'br', 'a', 'u', 'ch', 'its', 'ch'],\n", + " 'breach': ['▁', 'br', 'each'],\n", + " 'bread-and-butter': ['▁', 'b', 'read', '-', 'and', '-', 'but', 'ter'],\n", + " 'break': ['▁', 'b', 're', 'a', 'k'],\n", + " 'breaking': ['▁', 'b', 're', 'a', 'k', 'ing'],\n", + " 'breaks': ['▁', 'b', 're', 'a', 'k', 's'],\n", + " 'breath': ['▁', 'b', 're', 'a', 'th'],\n", + " 'breathing': ['▁', 'b', 're', 'a', 'thing'],\n", + " 'breathless': ['▁', 'b', 're', 'a', 'th', 'less'],\n", + " 'breeding': ['▁', 'b', 're', 'ed', 'ing'],\n", + " 'breezily': ['▁', 'b', 're', 'e', 'z', 'i', 'ly'],\n", + " 'brehm': ['▁', 'b', 're', 'h', 'm'],\n", + " 'brella': ['▁', 'br', 'ell', 'a'],\n", + " 'brenda': ['▁', 'br', 'end', 'a'],\n", + " 'brendan': ['▁', 'br', 'end', 'an'],\n", + " \"brendan's\": ['▁', 'br', 'end', 'an', \"'\", 's'],\n", + " 'brentano': ['▁', 'br', 'ent', 'a', 'no'],\n", + " 'brezhnev': ['▁', 'b', 're', 'z', 'h', 'ne', 'v'],\n", + " 'brian': ['▁', 'br', 'i', 'an'],\n", + " 'bridal': ['▁', 'br', 'id', 'al'],\n", + " 'bride': ['▁', 'br', 'i', 'de'],\n", + " 'brief': ['▁', 'brief'],\n", + " 'brief-': ['▁', 'brief', '-'],\n", + " 'briefcase': ['▁', 'brief', 'case'],\n", + " 'briefing': ['▁', 'brief', 'ing'],\n", + " 'brigadiers': ['▁', 'br', 'i', 'g', 'ad', 'i', 'ers'],\n", + " 'bright': ['▁', 'b', 'right'],\n", + " 'brighter': ['▁', 'b', 'right', 'er'],\n", + " 'brightly': ['▁', 'b', 'right', 'ly'],\n", + " \"brighton's\": ['▁', 'b', 'right', 'on', \"'\", 's'],\n", + " 'brilliant': ['▁', 'br', 'ill', 'i', 'ant'],\n", + " 'brilliantly': ['▁', 'br', 'ill', 'i', 'ant', 'ly'],\n", + " 'bring': ['▁', 'br', 'ing'],\n", + " 'brings': ['▁', 'br', 'ing', 's'],\n", + " 'bristled': ['▁', 'br', 'is', 't', 'led'],\n", + " 'bristol': ['▁', 'br', 'is', 'to', 'l'],\n", + " 'britain': ['▁', 'britain'],\n", + " \"britain's\": ['▁', 'britain', \"'\", 's'],\n", + " 'british': ['▁', 'british'],\n", + " 'british-owned': ['▁', 'british', '-', 'own', 'ed'],\n", + " 'britishers': ['▁', 'british', 'ers'],\n", + " 'brittle': ['▁', 'br', 'i', 'ttle'],\n", + " 'broad': ['▁', 'b', 'ro', 'ad'],\n", + " 'broadcast': ['▁', 'b', 'ro', 'ad', 'c', 'a', 'st'],\n", + " 'broadcasting': ['▁', 'b', 'ro', 'ad', 'c', 'a', 'st', 'ing'],\n", + " 'broke': ['▁', 'b', 'ro', 'ke'],\n", + " 'broken': ['▁', 'b', 'ro', 'k', 'en'],\n", + " 'bronx': ['▁', 'br', 'on', 'x'],\n", + " \"brook's\": ['▁', 'b', 'ro', 'o', 'k', \"'\", 's'],\n", + " 'brother': ['▁', 'brother'],\n", + " 'brother-': ['▁', 'brother', '-'],\n", + " 'brother-in-law': ['▁', 'brother', '-', 'in', '-', 'law'],\n", + " 'brought': ['▁', 'brought'],\n", + " 'brown': ['▁', 'brown'],\n", + " \"brown's\": ['▁', 'brown', \"'\", 's'],\n", + " 'bru\"cke': ['▁', 'br', 'u', '\"', 'ck', 'e'],\n", + " 'bruce': ['▁', 'br', 'u', 'ce'],\n", + " 'bruno': ['▁', 'br', 'un', 'o'],\n", + " 'brunswick': ['▁', 'br', 'un', 's', 'w', 'i', 'ck'],\n", + " 'brussels': ['▁', 'br', 'us', 's', 'el', 's'],\n", + " 'brutal': ['▁', 'br', 'u', 't', 'al'],\n", + " 'bryan': ['▁', 'br', 'y', 'an'],\n", + " 'bu\"ckerei': ['▁', 'b', 'u', '\"', 'ck', 'e', 're', 'i'],\n", + " 'buck': ['▁', 'b', 'u', 'ck'],\n", + " 'buckingham': ['▁', 'b', 'u', 'ck', 'ing', 'h', 'am'],\n", + " 'buckley': ['▁', 'b', 'u', 'ck', 'le', 'y'],\n", + " 'budge': ['▁', 'b', 'ud', 'g', 'e'],\n", + " 'budgerigar': ['▁', 'b', 'ud', 'g', 'er', 'i', 'g', 'ar'],\n", + " 'budget': ['▁', 'budget'],\n", + " 'budgetary': ['▁', 'budget', 'ary'],\n", + " 'budgette': ['▁', 'budget', 'te'],\n", + " 'buganda': ['▁', 'b', 'ug', 'and', 'a'],\n", + " 'build': ['▁', 'b', 'u', 'il', 'd'],\n", + " 'building': ['▁', 'building'],\n", + " ...}" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lex" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- cgit v1.2.3-70-g09d2