From 7e8e54e84c63171e748bbf09516fd517e6821ace Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 20 Mar 2021 18:09:06 +0100 Subject: Inital commit for refactoring to lightning --- src/notebooks/07-look-at-lexicon.ipynb | 1119 -------------------------------- 1 file changed, 1119 deletions(-) delete mode 100644 src/notebooks/07-look-at-lexicon.ipynb (limited to 'src/notebooks/07-look-at-lexicon.ipynb') diff --git a/src/notebooks/07-look-at-lexicon.ipynb b/src/notebooks/07-look-at-lexicon.ipynb deleted file mode 100644 index b7a5a0e..0000000 --- a/src/notebooks/07-look-at-lexicon.ipynb +++ /dev/null @@ -1,1119 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "from pathlib import Path\n", - "import numpy as np\n", - "from PIL import Image\n", - "import torch.nn.functional as F\n", - "import torch\n", - "from torch import nn\n", - "from torchsummary import summary\n", - "from importlib.util import find_spec\n", - "if find_spec(\"text_recognizer\") is None:\n", - " import sys\n", - " sys.path.append('..')" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "path = Path(\"../\").resolve().parent / \"data\" / \"processed\" / \"iam_lines\" / \"iamdb_1kwp_lex_1000.txt\"" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/data/processed/iam_lines/iamdb_1kwp_lex_1000.txt')" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "path" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "with open(path, \"r\") as f:\n", - " lex = (line.strip().split() for line in f)\n", - " lex = {line[0]: line[1:] for line in lex}\n", - " #print(len(lex))" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'!': ['▁', '!'],\n", - " '\"': ['▁', '\"'],\n", - " '&': ['▁', '&'],\n", - " \"'\": ['▁', \"'\"],\n", - " \"'30s\": ['▁', \"'\", '3', '0', 's'],\n", - " \"'61\": ['▁', \"'\", '6', '1'],\n", - " \"'d\": ['▁', \"'\", 'd'],\n", - " \"'ll\": ['▁', \"'\", 'll'],\n", - " \"'m\": ['▁', \"'\", 'm'],\n", - " \"'re\": ['▁', \"'\", 're'],\n", - " \"'s\": ['▁', \"'\", 's'],\n", - " \"'ve\": ['▁', \"'\", 've'],\n", - " '(': ['▁', '('],\n", - " ')': ['▁', ')'],\n", - " '*': ['▁', '*'],\n", - " '+2.8': ['▁', '+', '2', '.', '8'],\n", - " '+3.6': ['▁', '+', '3', '.', '6'],\n", - " ',': ['▁', ','],\n", - " '-': ['▁', '-'],\n", - " '-2.6': ['▁', '-', '2', '.', '6'],\n", - " '-5.4': ['▁', '-', '5', '.', '4'],\n", - " '.': ['▁', '.'],\n", - " '...': ['▁', '.', '.', '.'],\n", - " '0m': ['▁', '0', 'm'],\n", - " '1': ['▁', '1'],\n", - " '1,157': ['▁', '1', ',', '1', '5', '7'],\n", - " '1,400': ['▁', '1', ',', '4', '0', '0'],\n", - " '1,500': ['▁', '1', ',', '5', '0', '0'],\n", - " '1-2': ['▁', '1', '-', '2'],\n", - " '1.8': ['▁', '1', '.', '8'],\n", - " '1/2': ['▁', '1', '/', '2'],\n", - " '1/2-in.-long': ['▁', '1', '/', '2', '-', 'in', '.', '-', 'long'],\n", - " '1/4': ['▁', '1', '/', '4'],\n", - " '10': ['▁', '10'],\n", - " '10,000': ['▁', '10', ',', '0', '0', '0'],\n", - " '100': ['▁', '10', '0'],\n", - " '100,000,000': ['▁', '10', '0', ',', '0', '00,000'],\n", - " '104': ['▁', '10', '4'],\n", - " '11': ['▁', '1', '1'],\n", - " '12': ['▁', '1', '2'],\n", - " '12,000-word': ['▁', '1', '2', ',', '0', '0', '0', '-', 'word'],\n", - " '125': ['▁', '1', '2', '5'],\n", - " '13': ['▁', '1', '3'],\n", - " '13,000': ['▁', '1', '3', ',', '0', '0', '0'],\n", - " '14': ['▁', '1', '4'],\n", - " '15': ['▁', '1', '5'],\n", - " '15,000,000': ['▁', '1', '5', ',', '0', '00,000'],\n", - " '15-17': ['▁', '1', '5', '-', '1', '7'],\n", - " '15-nation': ['▁', '1', '5', '-', 'n', 'ation'],\n", - " '15-year-olds': ['▁', '1', '5', '-', 'year', '-', 'old', 's'],\n", - " '150,000,000': ['▁', '1', '5', '0', ',', '0', '00,000'],\n", - " '16': ['▁', '1', '6'],\n", - " '16,000': ['▁', '1', '6', ',', '0', '0', '0'],\n", - " '160': ['▁', '1', '6', '0'],\n", - " '163,000,000': ['▁', '1', '6', '3', ',', '0', '00,000'],\n", - " '167': ['▁', '1', '6', '7'],\n", - " '17': ['▁', '1', '7'],\n", - " '18': ['▁', '1', '8'],\n", - " '18.1': ['▁', '1', '8', '.', '1'],\n", - " '1830': ['▁', '1', '8', '3', '0'],\n", - " \"1830's\": ['▁', '1', '8', '3', '0', \"'\", 's'],\n", - " '1834': ['▁', '1', '8', '3', '4'],\n", - " '1897': ['▁', '1', '8', '9', '7'],\n", - " '19': ['▁', '1', '9'],\n", - " '19.5': ['▁', '1', '9', '.', '5'],\n", - " '1910': ['▁', '1', '9', '10'],\n", - " '1913': ['▁', '1', '9', '1', '3'],\n", - " '1914': ['▁', '1', '9', '1', '4'],\n", - " '1914-18': ['▁', '1', '9', '1', '4', '-', '1', '8'],\n", - " '1918': ['▁', '1', '9', '1', '8'],\n", - " '1920': ['▁', '1', '9', '2', '0'],\n", - " '1930': ['▁', '1', '9', '3', '0'],\n", - " '1931': ['▁', '1', '9', '3', '1'],\n", - " '1932': ['▁', '1', '9', '3', '2'],\n", - " '1934': ['▁', '1', '9', '3', '4'],\n", - " '1936': ['▁', '1', '9', '3', '6'],\n", - " '1939': ['▁', '1', '9', '3', '9'],\n", - " '1943': ['▁', '1', '9', '4', '3'],\n", - " '1944': ['▁', '1', '9', '4', '4'],\n", - " '1950': ['▁', '1', '9', '5', '0'],\n", - " '1951': ['▁', '1', '9', '5', '1'],\n", - " '1952': ['▁', '1', '9', '5', '2'],\n", - " '1953': ['▁', '1', '9', '5', '3'],\n", - " '1954': ['▁', '1', '9', '5', '4'],\n", - " '1956': ['▁', '1', '9', '5', '6'],\n", - " '1957': ['▁', '1', '9', '5', '7'],\n", - " '1958': ['▁', '1', '9', '5', '8'],\n", - " '1959': ['▁', '1', '9', '5', '9'],\n", - " '1960': ['▁', '1960'],\n", - " '1960s': ['▁', '1960', 's'],\n", - " '1961': ['▁', '1', '9', '6', '1'],\n", - " '1963': ['▁', '1', '9', '6', '3'],\n", - " '19th': ['▁', '1', '9', 'th'],\n", - " '1superceded': ['▁', '1', 'superceded'],\n", - " \"1tho'\": ['▁', '1', 'tho', \"'\"],\n", - " '2': ['▁', '2'],\n", - " '2,000': ['▁', '2', ',', '0', '0', '0'],\n", - " '2,415,000,000': ['▁', '2', ',', '4', '1', '5', ',', '0', '00,000'],\n", - " '20': ['▁', '2', '0'],\n", - " '20-month-old': ['▁', '2', '0', '-', 'month', '-', 'old'],\n", - " '200': ['▁', '2', '0', '0'],\n", - " '20th-century': ['▁', '2', '0', 'th', '-', 'cent', 'ur', 'y'],\n", - " '21': ['▁', '2', '1'],\n", - " '210million': ['▁', '2', '10', 'million'],\n", - " '22': ['▁', '2', '2'],\n", - " '23.1': ['▁', '2', '3', '.', '1'],\n", - " '24': ['▁', '2', '4'],\n", - " '24-strong': ['▁', '2', '4', '-', 'strong'],\n", - " '25': ['▁', '2', '5'],\n", - " '27': ['▁', '2', '7'],\n", - " '28.5': ['▁', '2', '8', '.', '5'],\n", - " '280,000': ['▁', '2', '8', '0', ',', '0', '0', '0'],\n", - " '287': ['▁', '2', '8', '7'],\n", - " '288': ['▁', '2', '8', '8'],\n", - " '2bhoys': ['▁', '2', 'b', 'ho', 'y', 's'],\n", - " '2ole': ['▁', '2', 'o', 'le'],\n", - " '2pianna': ['▁', '2', 'p', 'i', 'an', 'n', 'a'],\n", - " '2skint': ['▁', '2', 's', 'k', 'in', 't'],\n", - " '3': ['▁', '3'],\n", - " '3,000': ['▁', '3', ',', '0', '0', '0'],\n", - " '3.6': ['▁', '3', '.', '6'],\n", - " '3/0': ['▁', '3', '/', '0'],\n", - " '3/4': ['▁', '3', '/', '4'],\n", - " '30': ['▁', '3', '0'],\n", - " '30-day': ['▁', '3', '0', '-', 'day'],\n", - " '30-minute': ['▁', '3', '0', '-', 'minute'],\n", - " '300,000': ['▁', '3', '00,000'],\n", - " '32': ['▁', '3', '2'],\n", - " '33': ['▁', '3', '3'],\n", - " '34': ['▁', '3', '4'],\n", - " '35': ['▁', '3', '5'],\n", - " '357million': ['▁', '3', '5', '7', 'million'],\n", - " '36': ['▁', '3', '6'],\n", - " '37,000,000': ['▁', '3', '7', ',', '0', '00,000'],\n", - " '37.2': ['▁', '3', '7', '.', '2'],\n", - " '38': ['▁', '3', '8'],\n", - " '4': ['▁', '4'],\n", - " '4.8': ['▁', '4', '.', '8'],\n", - " '40': ['▁', '4', '0'],\n", - " '400': ['▁', '4', '0', '0'],\n", - " '400,000': ['▁', '4', '00,000'],\n", - " '420000': ['▁', '4', '2', '0', '0', '0', '0'],\n", - " '43': ['▁', '4', '3'],\n", - " '450': ['▁', '4', '5', '0'],\n", - " '5': ['▁', '5'],\n", - " '5,000': ['▁', '5', ',', '0', '0', '0'],\n", - " '5.30': ['▁', '5', '.', '3', '0'],\n", - " '5/8': ['▁', '5', '/', '8'],\n", - " '50': ['▁', '5', '0'],\n", - " '50,000': ['▁', '5', '0', ',', '0', '0', '0'],\n", - " '500': ['▁', '5', '0', '0'],\n", - " '53-year-old': ['▁', '5', '3', '-', 'year', '-', 'old'],\n", - " '55': ['▁', '5', '5'],\n", - " '550,000': ['▁', '5', '5', '0', ',', '0', '0', '0'],\n", - " '58': ['▁', '5', '8'],\n", - " '6': ['▁', '6'],\n", - " '6,000': ['▁', '6', ',', '0', '0', '0'],\n", - " '60': ['▁', '6', '0'],\n", - " '600': ['▁', '6', '0', '0'],\n", - " '600,000': ['▁', '6', '00,000'],\n", - " '61-year-old': ['▁', '6', '1', '-', 'year', '-', 'old'],\n", - " '68': ['▁', '6', '8'],\n", - " '6al': ['▁', '6', 'al'],\n", - " '6tic': ['▁', '6', 'tic'],\n", - " '7.30': ['▁', '7', '.', '3', '0'],\n", - " '7.42': ['▁', '7', '.', '4', '2'],\n", - " '70': ['▁', '7', '0'],\n", - " '70,000,000': ['▁', '7', '0', ',', '0', '00,000'],\n", - " '707': ['▁', '7', '0', '7'],\n", - " '73': ['▁', '7', '3'],\n", - " '750': ['▁', '7', '5', '0'],\n", - " '8': ['▁', '8'],\n", - " '8,000,000': ['▁', '8', ',', '0', '00,000'],\n", - " '8.25': ['▁', '8', '.', '2', '5'],\n", - " '8.4': ['▁', '8', '.', '4'],\n", - " '80': ['▁', '8', '0'],\n", - " '800': ['▁', '8', '0', '0'],\n", - " '800,000': ['▁', '8', '00,000'],\n", - " '86': ['▁', '8', '6'],\n", - " '88': ['▁', '8', '8'],\n", - " '88-year-old': ['▁', '8', '8', '-', 'year', '-', 'old'],\n", - " '89': ['▁', '8', '9'],\n", - " '89-year-old': ['▁', '8', '9', '-', 'year', '-', 'old'],\n", - " '9.30': ['▁', '9', '.', '3', '0'],\n", - " '9.40': ['▁', '9', '.', '4', '0'],\n", - " '90-day': ['▁', '9', '0', '-', 'day'],\n", - " '90-minute': ['▁', '9', '0', '-', 'minute'],\n", - " '91': ['▁', '9', '1'],\n", - " '950': ['▁', '9', '5', '0'],\n", - " '97.5': ['▁', '9', '7', '.', '5'],\n", - " ':': ['▁', ':'],\n", - " ';': ['▁', ';'],\n", - " '?': ['▁', '?'],\n", - " 'a': ['▁', 'a'],\n", - " 'abandon': ['▁', 'a', 'b', 'and', 'on'],\n", - " 'abandoned': ['▁', 'a', 'b', 'and', 'on', 'ed'],\n", - " 'abandoning': ['▁', 'a', 'b', 'and', 'on', 'ing'],\n", - " 'abashed': ['▁', 'a', 'bas', 'he', 'd'],\n", - " 'ability': ['▁', 'a', 'b', 'il', 'ity'],\n", - " 'able': ['▁', 'able'],\n", - " 'able-bodied': ['▁', 'able', '-', 'bo', 'die', 'd'],\n", - " 'abolish': ['▁', 'a', 'bo', 'l', 'ish'],\n", - " 'abolished': ['▁', 'a', 'bo', 'l', 'ish', 'ed'],\n", - " 'abolition': ['▁', 'a', 'bo', 'li', 'tion'],\n", - " 'abortion': ['▁', 'a', 'b', 'or', 'tion'],\n", - " 'abou': ['▁', 'a', 'bo', 'u'],\n", - " 'about': ['▁', 'about'],\n", - " 'about-': ['▁', 'about', '-'],\n", - " 'above': ['▁', 'a', 'bo', 've'],\n", - " 'abreast': ['▁', 'a', 'br', 'east'],\n", - " 'abroad': ['▁', 'a', 'b', 'ro', 'ad'],\n", - " 'absence': ['▁', 'a', 'b', 's', 'ence'],\n", - " 'absent': ['▁', 'a', 'b', 's', 'ent'],\n", - " 'absolutely': ['▁', 'a', 'b', 'solut', 'e', 'ly'],\n", - " 'abstraction': ['▁', 'a', 'b', 's', 'tr', 'action'],\n", - " 'abundance': ['▁', 'a', 'b', 'un', 'd', 'ance'],\n", - " 'ac-': ['▁', 'ac', '-'],\n", - " 'academic': ['▁', 'ac', 'a', 'de', 'm', 'ic'],\n", - " 'accent': ['▁', 'ac', 'cent'],\n", - " 'accents': ['▁', 'ac', 'cent', 's'],\n", - " 'accept': ['▁', 'accept'],\n", - " 'acceptable': ['▁', 'accept', 'able'],\n", - " 'accepted': ['▁', 'accept', 'ed'],\n", - " 'accepting': ['▁', 'accept', 'ing'],\n", - " 'accessories': ['▁', 'ac', 'ce', 's', 'so', 'ries'],\n", - " 'accident': ['▁', 'ac', 'c', 'id', 'ent'],\n", - " 'accidental': ['▁', 'ac', 'c', 'id', 'ent', 'al'],\n", - " 'accommodate': ['▁', 'ac', 'com', 'mo', 'date'],\n", - " 'accommodation': ['▁', 'ac', 'com', 'mo', 'd', 'ation'],\n", - " 'accompanied': ['▁', 'ac', 'com', 'pan', 'i', 'ed'],\n", - " 'accompanist': ['▁', 'ac', 'com', 'pan', 'is', 't'],\n", - " 'accompany': ['▁', 'ac', 'com', 'p', 'any'],\n", - " 'accomplished': ['▁', 'ac', 'com', 'p', 'l', 'ish', 'ed'],\n", - " 'accomplishments': ['▁', 'ac', 'com', 'p', 'l', 'ish', 'ment', 's'],\n", - " 'according': ['▁', 'ac', 'c', 'or', 'd', 'ing'],\n", - " 'account': ['▁', 'ac', 'count'],\n", - " 'accountancy': ['▁', 'ac', 'count', 'an', 'c', 'y'],\n", - " 'accra': ['▁', 'ac', 'c', 'ra'],\n", - " \"accra's\": ['▁', 'ac', 'c', 'ra', \"'\", 's'],\n", - " 'accuracy': ['▁', 'ac', 'cur', 'ac', 'y'],\n", - " 'accurate': ['▁', 'ac', 'cur', 'ate'],\n", - " 'accurately': ['▁', 'ac', 'cur', 'ate', 'ly'],\n", - " 'accused': ['▁', 'ac', 'c', 'used'],\n", - " 'achieved': ['▁', 'a', 'ch', 'i', 'e', 'v', 'ed'],\n", - " 'achievement': ['▁', 'a', 'ch', 'i', 'e', 've', 'ment'],\n", - " 'acquaintance': ['▁', 'ac', 'q', 'u', 'a', 'in', 't', 'ance'],\n", - " 'acquaintances': ['▁', 'ac', 'q', 'u', 'a', 'in', 't', 'ance', 's'],\n", - " 'acres': ['▁', 'ac', 're', 's'],\n", - " 'across': ['▁', 'a', 'cross'],\n", - " 'act': ['▁', 'act'],\n", - " 'acting': ['▁', 'act', 'ing'],\n", - " 'action': ['▁', 'action'],\n", - " 'actions': ['▁', 'action', 's'],\n", - " 'active': ['▁', 'act', 'ive'],\n", - " 'activists': ['▁', 'act', 'i', 'vi', 'st', 's'],\n", - " 'activities': ['▁', 'act', 'i', 'v', 'it', 'ies'],\n", - " 'activity': ['▁', 'act', 'i', 'v', 'ity'],\n", - " 'acton': ['▁', 'act', 'on'],\n", - " 'actor': ['▁', 'act', 'or'],\n", - " 'actress': ['▁', 'act', 're', 's', 's'],\n", - " 'acts': ['▁', 'act', 's'],\n", - " 'actual': ['▁', 'act', 'ual'],\n", - " 'actually': ['▁', 'act', 'ual', 'ly'],\n", - " 'adamafio': ['▁', 'ad', 'a', 'ma', 'f', 'i', 'o'],\n", - " 'adaptation': ['▁', 'ad', 'ap', 't', 'ation'],\n", - " 'adapted': ['▁', 'ad', 'ap', 'ted'],\n", - " 'adapting': ['▁', 'ad', 'ap', 't', 'ing'],\n", - " 'add': ['▁', 'ad', 'd'],\n", - " 'added': ['▁', 'ad', 'd', 'ed'],\n", - " 'adding': ['▁', 'adding'],\n", - " 'addition': ['▁', 'ad', 'd', 'it', 'ion'],\n", - " 'additions': ['▁', 'ad', 'd', 'it', 'ion', 's'],\n", - " 'address': ['▁', 'ad', 'dr', 'es', 's'],\n", - " 'addressed': ['▁', 'ad', 'dr', 'es', 's', 'ed'],\n", - " 'addresses': ['▁', 'ad', 'dr', 'es', 'se', 's'],\n", - " 'addressing': ['▁', 'ad', 'dr', 'es', 's', 'ing'],\n", - " 'adenauer': ['▁', 'adenauer'],\n", - " \"adenauer's\": ['▁', 'adenauer', \"'\", 's'],\n", - " 'adequate': ['▁', 'ad', 'equa', 'te'],\n", - " 'adhem': ['▁', 'ad', 'he', 'm'],\n", - " 'adjust': ['▁', 'ad', 'just'],\n", - " 'adjustment': ['▁', 'ad', 'just', 'ment'],\n", - " 'administration': ['▁', 'ad', 'ministr', 'ation'],\n", - " \"administration's\": ['▁', 'ad', 'ministr', 'ation', \"'\", 's'],\n", - " 'administrative': ['▁', 'ad', 'ministr', 'at', 'ive'],\n", - " 'admiralty': ['▁', 'ad', 'm', 'i', 'r', 'al', 'ty'],\n", - " 'admire': ['▁', 'ad', 'm', 'i', 're'],\n", - " 'admit': ['▁', 'ad', 'm', 'it'],\n", - " 'admitted': ['▁', 'ad', 'm', 'it', 'ted'],\n", - " 'admitting': ['▁', 'ad', 'm', 'it', 't', 'ing'],\n", - " 'adopted': ['▁', 'a', 'do', 'p', 'ted'],\n", - " 'adopting': ['▁', 'a', 'do', 'p', 't', 'ing'],\n", - " 'adoption': ['▁', 'a', 'do', 'p', 'tion'],\n", - " 'adult': ['▁', 'ad', 'ul', 't'],\n", - " 'advance': ['▁', 'ad', 'v', 'ance'],\n", - " 'advanced': ['▁', 'ad', 'v', 'ance', 'd'],\n", - " 'advancing': ['▁', 'ad', 'v', 'an', 'c', 'ing'],\n", - " 'advantage': ['▁', 'advantage'],\n", - " 'advantages': ['▁', 'advantage', 's'],\n", - " 'advertisement': ['▁', 'ad', 'ver', 't', 'is', 'e', 'ment'],\n", - " 'advertisements': ['▁', 'ad', 'ver', 't', 'is', 'ements'],\n", - " 'advice': ['▁', 'advi', 'ce'],\n", - " 'advisability': ['▁', 'advi', 's', 'a', 'b', 'il', 'ity'],\n", - " 'advise': ['▁', 'advise'],\n", - " 'advised': ['▁', 'advise', 'd'],\n", - " 'advisers': ['▁', 'advise', 'r', 's'],\n", - " 'advocate': ['▁', 'ad', 'v', 'o', 'c', 'ate'],\n", - " 'af-': ['▁', 'a', 'f', '-'],\n", - " 'affairs': ['▁', 'a', 'f', 'f', 'air', 's'],\n", - " 'affected': ['▁', 'a', 'f', 'fe', 'c', 'ted'],\n", - " 'affection': ['▁', 'a', 'f', 'fe', 'c', 'tion'],\n", - " 'affilia-': ['▁', 'a', 'f', 'f', 'il', 'i', 'a', '-'],\n", - " 'affiliations': ['▁', 'a', 'f', 'f', 'il', 'i', 'ation', 's'],\n", - " 'affluence': ['▁', 'a', 'f', 'f', 'l', 'u', 'ence'],\n", - " 'affluent': ['▁', 'a', 'f', 'f', 'l', 'u', 'ent'],\n", - " 'afford': ['▁', 'a', 'f', 'for', 'd'],\n", - " 'afraid': ['▁', 'a', 'fr', 'a', 'id'],\n", - " 'africa': ['▁', 'africa'],\n", - " \"africa's\": ['▁', 'africa', \"'\", 's'],\n", - " 'african': ['▁', 'african'],\n", - " 'africans': ['▁', 'african', 's'],\n", - " 'after': ['▁', 'after'],\n", - " 'afternoon': ['▁', 'after', 'no', 'on'],\n", - " 'afterwards': ['▁', 'after', 'ward', 's'],\n", - " 'again': ['▁', 'again'],\n", - " 'against': ['▁', 'against'],\n", - " 'age': ['▁', 'age'],\n", - " 'age-structure': ['▁', 'age', '-', 's', 'tru', 'c', 'ture'],\n", - " 'aged': ['▁', 'aged'],\n", - " 'ageing': ['▁', 'age', 'ing'],\n", - " 'agent': ['▁', 'a', 'g', 'ent'],\n", - " 'agents': ['▁', 'a', 'g', 'ent', 's'],\n", - " 'ages': ['▁', 'age', 's'],\n", - " 'agitation': ['▁', 'a', 'g', 'it', 'ation'],\n", - " 'ago': ['▁', 'a', 'go'],\n", - " 'agree': ['▁', 'agree'],\n", - " 'agreed': ['▁', 'agree', 'd'],\n", - " 'agreement': ['▁', 'agree', 'ment'],\n", - " 'agreements': ['▁', 'agree', 'ment', 's'],\n", - " 'agriculture': ['▁', 'a', 'gr', 'ic', 'ul', 'ture'],\n", - " 'ahead': ['▁', 'a', 'head'],\n", - " 'aid': ['▁', 'a', 'id'],\n", - " 'aide': ['▁', 'a', 'i', 'de'],\n", - " 'aided': ['▁', 'a', 'id', 'ed'],\n", - " 'aides': ['▁', 'a', 'id', 'es'],\n", - " 'aim': ['▁', 'a', 'im'],\n", - " 'aimed': ['▁', 'a', 'im', 'ed'],\n", - " 'aiming': ['▁', 'a', 'im', 'ing'],\n", - " 'air': ['▁', 'air'],\n", - " 'aircraft': ['▁', 'air', 'craft'],\n", - " 'aired': ['▁', 'air', 'ed'],\n", - " \"airliner's\": ['▁', 'air', 'line', 'r', \"'\", 's'],\n", - " 'airmen': ['▁', 'air', 'men'],\n", - " 'airport': ['▁', 'air', 'port'],\n", - " 'akin': ['▁', 'a', 'k', 'in'],\n", - " \"aladdin's\": ['▁', 'al', 'ad', 'd', 'in', \"'\", 's'],\n", - " 'alan': ['▁', 'al', 'an'],\n", - " 'alarm': ['▁', 'al', 'arm'],\n", - " 'alarmed': ['▁', 'al', 'arm', 'ed'],\n", - " 'alas': ['▁', 'al', 'as'],\n", - " 'alcoholic': ['▁', 'al', 'co', 'ho', 'li', 'c'],\n", - " 'algeria': ['▁', 'al', 'g', 'er', 'i', 'a'],\n", - " 'alike': ['▁', 'a', 'like'],\n", - " 'alive': ['▁', 'a', 'live'],\n", - " 'all': ['▁', 'all'],\n", - " 'all-regular': ['▁', 'all', '-', 'regular'],\n", - " 'alleged': ['▁', 'al', 'leg', 'ed'],\n", - " 'allen': ['▁', 'all', 'en'],\n", - " 'alleviation': ['▁', 'alleviation'],\n", - " 'alley': ['▁', 'al', 'le', 'y'],\n", - " 'alliance': ['▁', 'all', 'i', 'ance'],\n", - " 'alliances': ['▁', 'all', 'i', 'ance', 's'],\n", - " 'allied': ['▁', 'all', 'i', 'ed'],\n", - " 'allies': ['▁', 'all', 'ies'],\n", - " 'allow': ['▁', 'allow'],\n", - " 'allowance': ['▁', 'allow', 'ance'],\n", - " 'allowances': ['▁', 'allow', 'ance', 's'],\n", - " 'allowed': ['▁', 'allow', 'ed'],\n", - " 'allowing': ['▁', 'allow', 'ing'],\n", - " 'ally': ['▁', 'al', 'ly'],\n", - " 'almost': ['▁', 'al', 'most'],\n", - " 'alone': ['▁', 'al', 'one'],\n", - " 'along': ['▁', 'a', 'long'],\n", - " 'alongside': ['▁', 'a', 'long', 'side'],\n", - " 'aloud': ['▁', 'a', 'lo', 'ud'],\n", - " 'already': ['▁', 'al', 'read', 'y'],\n", - " 'also': ['▁', 'also'],\n", - " 'alter': ['▁', 'al', 'ter'],\n", - " 'alternative': ['▁', 'al', 'ter', 'n', 'at', 'ive'],\n", - " 'alternatively': ['▁', 'al', 'ter', 'n', 'at', 'ive', 'ly'],\n", - " 'alternatives': ['▁', 'al', 'ter', 'n', 'at', 'ive', 's'],\n", - " 'although': ['▁', 'al', 'though'],\n", - " 'altogether': ['▁', 'al', 'together'],\n", - " 'altos': ['▁', 'al', 'to', 's'],\n", - " 'always': ['▁', 'always'],\n", - " 'am': ['▁', 'am'],\n", - " 'amateur': ['▁', 'am', 'ate', 'ur'],\n", - " 'amazed': ['▁', 'a', 'ma', 'z', 'ed'],\n", - " 'amazing': ['▁', 'a', 'ma', 'z', 'ing'],\n", - " 'ambassador': ['▁', 'am', 'bas', 's', 'ad', 'or'],\n", - " 'amber': ['▁', 'a', 'mber'],\n", - " 'ambition': ['▁', 'am', 'b', 'it', 'ion'],\n", - " 'ambitious': ['▁', 'am', 'b', 'it', 'i', 'ous'],\n", - " 'ambulance': ['▁', 'am', 'b', 'ul', 'ance'],\n", - " 'ambulances': ['▁', 'am', 'b', 'ul', 'ance', 's'],\n", - " 'america': ['▁', 'america'],\n", - " \"america's\": ['▁', 'america', \"'\", 's'],\n", - " 'american': ['▁', 'american'],\n", - " 'american-born': ['▁', 'american', '-', 'b', 'or', 'n'],\n", - " 'americans': ['▁', 'american', 's'],\n", - " 'amid': ['▁', 'am', 'id'],\n", - " 'ammunition': ['▁', 'am', 'm', 'un', 'it', 'ion'],\n", - " 'among': ['▁', 'among'],\n", - " 'amount': ['▁', 'a', 'mo', 'un', 't'],\n", - " 'ample': ['▁', 'amp', 'le'],\n", - " 'amusement': ['▁', 'am', 'use', 'ment'],\n", - " 'amusing': ['▁', 'am', 'us', 'ing'],\n", - " 'an': ['▁', 'an'],\n", - " 'analogy': ['▁', 'an', 'a', 'lo', 'g', 'y'],\n", - " 'analysed': ['▁', 'an', 'a', 'ly', 's', 'ed'],\n", - " 'anchor': ['▁', 'an', 'ch', 'or'],\n", - " 'ancient': ['▁', 'an', 'c', 'i', 'ent'],\n", - " 'and': ['▁', 'and'],\n", - " 'andrei': ['▁', 'and', 're', 'i'],\n", - " 'andrew': ['▁', 'and', 're', 'w'],\n", - " 'anecdotal': ['▁', 'an', 'e', 'c', 'do', 't', 'al'],\n", - " 'angel': ['▁', 'ang', 'el'],\n", - " 'angeles': ['▁', 'ang', 'el', 'es'],\n", - " 'angelo': ['▁', 'ang', 'e', 'lo'],\n", - " 'anger': ['▁', 'ang', 'er'],\n", - " 'anglais': ['▁', 'ang', 'la', 'is'],\n", - " 'angle': ['▁', 'ang', 'le'],\n", - " 'anglesey': ['▁', 'anglesey'],\n", - " \"anglesey's\": ['▁', 'anglesey', \"'\", 's'],\n", - " 'anglesey-road': ['▁', 'anglesey', '-', 'ro', 'ad'],\n", - " 'angola': ['▁', 'an', 'go', 'la'],\n", - " 'angrily': ['▁', 'an', 'gr', 'i', 'ly'],\n", - " 'angry': ['▁', 'ang', 'ry'],\n", - " 'ann': ['▁', 'an', 'n'],\n", - " 'anna': ['▁', 'an', 'n', 'a'],\n", - " 'announced': ['▁', 'an', 'no', 'un', 'c', 'ed'],\n", - " 'announcement': ['▁', 'an', 'no', 'un', 'ce', 'ment'],\n", - " 'announcing': ['▁', 'an', 'no', 'un', 'c', 'ing'],\n", - " 'annoyed': ['▁', 'an', 'no', 'y', 'ed'],\n", - " 'annual': ['▁', 'an', 'n', 'ual'],\n", - " 'another': ['▁', 'another'],\n", - " 'answer': ['▁', 'answer'],\n", - " 'answered': ['▁', 'answer', 'ed'],\n", - " 'answering': ['▁', 'answer', 'ing'],\n", - " 'antagonism': ['▁', 'ant', 'a', 'g', 'on', 'is', 'm'],\n", - " 'anthony': ['▁', 'an', 'th', 'on', 'y'],\n", - " 'anti-apartheid': ['▁', 'ant', 'i', '-', 'a', 'part', 'he', 'id'],\n", - " 'anti-bomb': ['▁', 'ant', 'i', '-', 'bomb'],\n", - " 'anti-german': ['▁', 'ant', 'i', '-', 'german'],\n", - " 'anti-nato': ['▁', 'ant', 'i', '-', 'nato'],\n", - " 'anti-negro': ['▁', 'ant', 'i', '-', 'negro'],\n", - " 'anti-nuclear': ['▁', 'ant', 'i', '-', 'nuclear'],\n", - " 'anti-soviet': ['▁', 'ant', 'i', '-', 'soviet'],\n", - " 'anti-tory': ['▁', 'ant', 'i', '-', 'tory'],\n", - " 'anticipation': ['▁', 'an', 'tic', 'ip', 'ation'],\n", - " 'antonioni': ['▁', 'ant', 'on', 'ion', 'i'],\n", - " \"antonioni's\": ['▁', 'ant', 'on', 'ion', 'i', \"'\", 's'],\n", - " 'any': ['▁', 'any'],\n", - " 'any-': ['▁', 'any', '-'],\n", - " 'anybody': ['▁', 'any', 'body'],\n", - " \"anybody's\": ['▁', 'any', 'body', \"'\", 's'],\n", - " 'anyone': ['▁', 'any', 'one'],\n", - " 'anything': ['▁', 'any', 'thing'],\n", - " 'anyway': ['▁', 'any', 'way'],\n", - " 'apart': ['▁', 'a', 'part'],\n", - " 'apartheid': ['▁', 'a', 'part', 'he', 'id'],\n", - " 'apathetic': ['▁', 'a', 'pa', 'the', 'tic'],\n", - " 'apathy': ['▁', 'a', 'pa', 'th', 'y'],\n", - " 'apex': ['▁', 'ap', 'ex'],\n", - " 'apocalypse': ['▁', 'a', 'po', 'c', 'a', 'ly', 'p', 'se'],\n", - " 'apologising': ['▁', 'a', 'po', 'lo', 'g', 'is', 'ing'],\n", - " 'appalled': ['▁', 'app', 'all', 'ed'],\n", - " 'appalling': ['▁', 'app', 'all', 'ing'],\n", - " 'apparatus': ['▁', 'app', 'ar', 'at', 'us'],\n", - " 'apparent': ['▁', 'app', 'ar', 'ent'],\n", - " 'apparently': ['▁', 'app', 'ar', 'ent', 'ly'],\n", - " 'appeal': ['▁', 'appeal'],\n", - " 'appealing': ['▁', 'appeal', 'ing'],\n", - " 'appeals': ['▁', 'appeal', 's'],\n", - " 'appear': ['▁', 'appear'],\n", - " 'appearance': ['▁', 'appear', 'ance'],\n", - " 'appeared': ['▁', 'appear', 'ed'],\n", - " 'appears': ['▁', 'appear', 's'],\n", - " 'appeasement': ['▁', 'app', 'e', 'a', 'se', 'ment'],\n", - " 'applauding': ['▁', 'app', 'la', 'ud', 'ing'],\n", - " 'appliances': ['▁', 'app', 'li', 'ance', 's'],\n", - " 'application': ['▁', 'app', 'li', 'c', 'ation'],\n", - " 'applications': ['▁', 'app', 'li', 'c', 'ation', 's'],\n", - " 'applied': ['▁', 'app', 'li', 'ed'],\n", - " 'apply': ['▁', 'app', 'ly'],\n", - " 'appointed': ['▁', 'ap', 'point', 'ed'],\n", - " 'appointment': ['▁', 'ap', 'point', 'ment'],\n", - " 'appreciable': ['▁', 'app', 're', 'c', 'i', 'able'],\n", - " 'appreciably': ['▁', 'app', 're', 'c', 'i', 'ably'],\n", - " 'appreciated': ['▁', 'app', 're', 'c', 'i', 'at', 'ed'],\n", - " 'appreciation': ['▁', 'app', 're', 'c', 'i', 'ation'],\n", - " 'apprenticeships': ['▁', 'app', 'r', 'ent', 'i', 'ce', 'ship', 's'],\n", - " 'approach': ['▁', 'ap', 'pro', 'a', 'ch'],\n", - " 'approached': ['▁', 'ap', 'pro', 'a', 'ch', 'ed'],\n", - " 'approaches': ['▁', 'ap', 'pro', 'a', 'che', 's'],\n", - " 'appropriate': ['▁', 'ap', 'pro', 'pri', 'ate'],\n", - " 'appropriated': ['▁', 'ap', 'pro', 'pri', 'at', 'ed'],\n", - " 'approval': ['▁', 'ap', 'pro', 'val'],\n", - " 'approximately': ['▁', 'ap', 'pro', 'x', 'im', 'ate', 'ly'],\n", - " 'april': ['▁', 'a', 'pri', 'l'],\n", - " 'archbishop': ['▁', 'ar', 'ch', 'b', 'is', 'hop'],\n", - " 'arches': ['▁', 'ar', 'che', 's'],\n", - " 'archipelago': ['▁', 'ar', 'ch', 'i', 'pe', 'la', 'go'],\n", - " 'architect': ['▁', 'ar', 'ch', 'it', 'e', 'c', 't'],\n", - " 'architecture': ['▁', 'ar', 'ch', 'it', 'e', 'c', 'ture'],\n", - " 'are': ['▁', 'are'],\n", - " 'area': ['▁', 'are', 'a'],\n", - " 'areas': ['▁', 'are', 'as'],\n", - " \"aren't\": ['▁', 'are', 'n', \"'\", 't'],\n", - " 'arguably': ['▁', 'ar', 'gu', 'ably'],\n", - " 'argued': ['▁', 'ar', 'gu', 'ed'],\n", - " 'argues': ['▁', 'ar', 'gu', 'es'],\n", - " 'arguing': ['▁', 'ar', 'gu', 'ing'],\n", - " 'argument': ['▁', 'ar', 'gu', 'ment'],\n", - " 'arguments': ['▁', 'ar', 'gu', 'ment', 's'],\n", - " 'arise': ['▁', 'a', 'rise'],\n", - " 'arises': ['▁', 'a', 'rise', 's'],\n", - " 'arm': ['▁', 'arm'],\n", - " 'armament': ['▁', 'arm', 'a', 'ment'],\n", - " 'armaments': ['▁', 'arm', 'a', 'ment', 's'],\n", - " 'armed': ['▁', 'arm', 'ed'],\n", - " 'armoured': ['▁', 'arm', 'our', 'ed'],\n", - " 'arms': ['▁', 'arm', 's'],\n", - " \"arms'\": ['▁', 'arm', 's', \"'\"],\n", - " 'army': ['▁', 'arm', 'y'],\n", - " 'arnold': ['▁', 'ar', 'n', 'old'],\n", - " 'arose': ['▁', 'a', 'ro', 'se'],\n", - " 'around': ['▁', 'a', 'round'],\n", - " 'aroused': ['▁', 'ar', 'ous', 'ed'],\n", - " 'arrange': ['▁', 'ar', 'range'],\n", - " 'arranged': ['▁', 'ar', 'range', 'd'],\n", - " 'arrangement': ['▁', 'ar', 'range', 'ment'],\n", - " 'arrangements': ['▁', 'ar', 'range', 'ment', 's'],\n", - " 'arranging': ['▁', 'ar', 'r', 'ang', 'ing'],\n", - " 'arrears': ['▁', 'ar', 're', 'ar', 's'],\n", - " 'arrested': ['▁', 'ar', 'rest', 'ed'],\n", - " 'arrival': ['▁', 'ar', 'r', 'i', 'val'],\n", - " 'arrive': ['▁', 'ar', 'r', 'ive'],\n", - " 'arrived': ['▁', 'arrived'],\n", - " 'arrives': ['▁', 'ar', 'r', 'ive', 's'],\n", - " 'arrogant': ['▁', 'ar', 'ro', 'g', 'ant'],\n", - " 'art': ['▁', 'ar', 't'],\n", - " 'arthur': ['▁', 'ar', 'th', 'ur'],\n", - " 'article': ['▁', 'ar', 'tic', 'le'],\n", - " 'articles': ['▁', 'ar', 'tic', 'le', 's'],\n", - " 'articulation': ['▁', 'ar', 'tic', 'ul', 'ation'],\n", - " 'artistic': ['▁', 'ar', 'tist', 'ic'],\n", - " 'artistically': ['▁', 'ar', 'tist', 'ical', 'ly'],\n", - " 'artistry': ['▁', 'ar', 'tist', 'ry'],\n", - " 'artists': ['▁', 'ar', 'tist', 's'],\n", - " 'as': ['▁', 'as'],\n", - " 'ascents': ['▁', 'as', 'cent', 's'],\n", - " 'ash': ['▁', 'as', 'h'],\n", - " 'ashen': ['▁', 'as', 'he', 'n'],\n", - " 'ask': ['▁', 'as', 'k'],\n", - " 'asked': ['▁', 'asked'],\n", - " 'asking': ['▁', 'asking'],\n", - " 'aspect': ['▁', 'a', 'spect'],\n", - " 'aspects': ['▁', 'a', 'spect', 's'],\n", - " 'aspiring': ['▁', 'as', 'p', 'i', 'r', 'ing'],\n", - " 'assault': ['▁', 'as', 's', 'a', 'ul', 't'],\n", - " 'assembler': ['▁', 'as', 'se', 'm', 'bl', 'er'],\n", - " 'assembly': ['▁', 'as', 'se', 'm', 'b', 'ly'],\n", - " 'assess': ['▁', 'as', 'se', 's', 's'],\n", - " 'assessment': ['▁', 'as', 'se', 's', 's', 'ment'],\n", - " 'assistance': ['▁', 'as', 's', 'istance'],\n", - " 'assistant': ['▁', 'as', 's', 'is', 't', 'ant'],\n", - " 'assistants': ['▁', 'as', 's', 'is', 't', 'ant', 's'],\n", - " 'associate': ['▁', 'associat', 'e'],\n", - " 'associated': ['▁', 'associat', 'ed'],\n", - " 'associates': ['▁', 'associat', 'es'],\n", - " 'association': ['▁', 'associat', 'ion'],\n", - " 'assortment': ['▁', 'as', 's', 'or', 't', 'ment'],\n", - " 'assumption': ['▁', 'assumption'],\n", - " 'assurance': ['▁', 'as', 's', 'ur', 'ance'],\n", - " 'astronaut': ['▁', 'as', 'tr', 'on', 'a', 'u', 't'],\n", - " 'astute': ['▁', 'a', 'st', 'u', 'te'],\n", - " 'at': ['▁', 'at'],\n", - " 'ately': ['▁', 'ate', 'ly'],\n", - " 'atkinson': ['▁', 'at', 'k', 'in', 's', 'on'],\n", - " 'atlantic': ['▁', 'at', 'l', 'an', 'tic'],\n", - " 'atmosphere': ['▁', 'atmospher', 'e'],\n", - " 'atmospheric': ['▁', 'atmospher', 'ic'],\n", - " 'atomic': ['▁', 'a', 'to', 'm', 'ic'],\n", - " 'atoms': ['▁', 'a', 'to', 'm', 's'],\n", - " 'attach': ['▁', 'at', 't', 'a', 'ch'],\n", - " 'attached': ['▁', 'at', 't', 'a', 'ch', 'ed'],\n", - " 'attack': ['▁', 'at', 't', 'a', 'ck'],\n", - " 'attacked': ['▁', 'at', 't', 'a', 'ck', 'ed'],\n", - " 'attacks': ['▁', 'at', 't', 'a', 'ck', 's'],\n", - " 'attainable': ['▁', 'at', 'tain', 'able'],\n", - " 'attempt': ['▁', 'attempt'],\n", - " 'attempted': ['▁', 'attempt', 'ed'],\n", - " 'attempting': ['▁', 'attempt', 'ing'],\n", - " 'attempts': ['▁', 'attempt', 's'],\n", - " 'atten-': ['▁', 'at', 'ten', '-'],\n", - " 'attend': ['▁', 'at', 't', 'end'],\n", - " 'attendance': ['▁', 'at', 't', 'end', 'ance'],\n", - " 'attended': ['▁', 'at', 't', 'end', 'ed'],\n", - " 'attending': ['▁', 'at', 't', 'end', 'ing'],\n", - " 'attention': ['▁', 'at', 'ten', 'tion'],\n", - " 'attitude': ['▁', 'at', 't', 'it', 'u', 'de'],\n", - " 'attitudes': ['▁', 'at', 't', 'it', 'ud', 'es'],\n", - " 'attracted': ['▁', 'at', 'tr', 'act', 'ed'],\n", - " 'attractive': ['▁', 'at', 'tr', 'act', 'ive'],\n", - " 'aubrey': ['▁', 'a', 'u', 'b', 're', 'y'],\n", - " 'audacity': ['▁', 'a', 'ud', 'ac', 'ity'],\n", - " 'auden': ['▁', 'a', 'ud', 'en'],\n", - " 'audience': ['▁', 'a', 'ud', 'i', 'ence'],\n", - " 'audio-tv': ['▁', 'a', 'ud', 'i', 'o', '-', 't', 'v'],\n", - " 'audited': ['▁', 'a', 'ud', 'it', 'ed'],\n", - " 'august': ['▁', 'a', 'ug', 'u', 'st'],\n", - " 'auntie': ['▁', 'a', 'un', 't', 'i', 'e'],\n", - " 'austerity': ['▁', 'a', 'u', 'ster', 'ity'],\n", - " 'australia': ['▁', 'a', 'us', 'tr', 'al', 'i', 'a'],\n", - " 'austria': ['▁', 'a', 'us', 'tri', 'a'],\n", - " 'austrian': ['▁', 'a', 'us', 'tri', 'an'],\n", - " 'authentic': ['▁', 'a', 'u', 'then', 'tic'],\n", - " 'author': ['▁', 'author'],\n", - " 'authorised': ['▁', 'author', 'is', 'ed'],\n", - " 'authorities': ['▁', 'author', 'it', 'ies'],\n", - " 'authority': ['▁', 'author', 'ity'],\n", - " 'automatically': ['▁', 'a', 'u', 'to', 'm', 'at', 'ical', 'ly'],\n", - " 'automation': ['▁', 'a', 'u', 'to', 'm', 'ation'],\n", - " 'autumn': ['▁', 'a', 'u', 't', 'um', 'n'],\n", - " 'available': ['▁', 'a', 'v', 'a', 'il', 'able'],\n", - " 'avenue': ['▁', 'a', 've', 'n', 'ue'],\n", - " 'average': ['▁', 'a', 'ver', 'age'],\n", - " 'averages': ['▁', 'a', 'ver', 'age', 's'],\n", - " 'avert': ['▁', 'a', 'ver', 't'],\n", - " 'aviation': ['▁', 'a', 'vi', 'ation'],\n", - " 'avoid': ['▁', 'a', 'v', 'o', 'id'],\n", - " 'avoided': ['▁', 'a', 'v', 'o', 'id', 'ed'],\n", - " 'avon': ['▁', 'a', 'v', 'on'],\n", - " 'awake': ['▁', 'a', 'w', 'a', 'ke'],\n", - " 'awarded': ['▁', 'a', 'ward', 'ed'],\n", - " 'awards': ['▁', 'a', 'ward', 's'],\n", - " 'aware': ['▁', 'a', 'w', 'are'],\n", - " 'awareness': ['▁', 'a', 'w', 'are', 'ness'],\n", - " 'away': ['▁', 'a', 'way'],\n", - " 'awful': ['▁', 'a', 'w', 'ful'],\n", - " 'awfully': ['▁', 'a', 'w', 'ful', 'ly'],\n", - " 'b': ['▁', 'b'],\n", - " 'b.': ['▁', 'b', '.'],\n", - " 'b.b.c.': ['▁', 'b', '.', 'b', '.', 'c', '.'],\n", - " 'babe': ['▁', 'b', 'a', 'be'],\n", - " 'babel': ['▁', 'b', 'a', 'be', 'l'],\n", - " 'bably': ['▁', 'b', 'ably'],\n", - " 'baby': ['▁', 'b', 'a', 'by'],\n", - " \"baby's\": ['▁', 'b', 'a', 'by', \"'\", 's'],\n", - " 'back': ['▁', 'back'],\n", - " 'backbone': ['▁', 'back', 'b', 'one'],\n", - " 'backed': ['▁', 'back', 'ed'],\n", - " 'backers': ['▁', 'back', 'ers'],\n", - " 'background': ['▁', 'back', 'ground'],\n", - " 'backing': ['▁', 'back', 'ing'],\n", - " 'backstage': ['▁', 'back', 'st', 'age'],\n", - " 'backward': ['▁', 'back', 'ward'],\n", - " 'bad': ['▁', 'b', 'ad'],\n", - " 'badly': ['▁', 'b', 'ad', 'ly'],\n", - " 'baffled': ['▁', 'b', 'a', 'f', 'f', 'led'],\n", - " 'bag': ['▁', 'b', 'a', 'g'],\n", - " 'bagaya': ['▁', 'b', 'a', 'gay', 'a'],\n", - " 'baker': ['▁', 'b', 'a', 'k', 'er'],\n", - " 'balance': ['▁', 'b', 'al', 'ance'],\n", - " 'balance-sheet': ['▁', 'b', 'al', 'ance', '-', 'she', 'e', 't'],\n", - " 'balances': ['▁', 'b', 'al', 'ance', 's'],\n", - " 'bald': ['▁', 'b', 'al', 'd'],\n", - " 'ball': ['▁', 'b', 'all'],\n", - " 'balloon': ['▁', 'b', 'all', 'o', 'on'],\n", - " 'ballyhoo': ['▁', 'b', 'al', 'ly', 'ho', 'o'],\n", - " 'baltic': ['▁', 'b', 'al', 'tic'],\n", - " 'ban': ['▁', 'b', 'an'],\n", - " 'ban-': ['▁', 'b', 'an', '-'],\n", - " 'ban-the-': ['▁', 'b', 'an', '-', 'the', '-'],\n", - " 'ban-the-bomb': ['▁', 'b', 'an', '-', 'the', '-', 'bomb'],\n", - " 'bank': ['▁', 'bank'],\n", - " \"bank's\": ['▁', 'bank', \"'\", 's'],\n", - " 'banking': ['▁', 'bank', 'ing'],\n", - " 'bankrupt': ['▁', 'bank', 'r', 'up', 't'],\n", - " 'banks': ['▁', 'bank', 's'],\n", - " \"banks'\": ['▁', 'bank', 's', \"'\"],\n", - " 'banned': ['▁', 'b', 'an', 'n', 'ed'],\n", - " 'banzie': ['▁', 'b', 'an', 'z', 'i', 'e'],\n", - " 'bar': ['▁', 'b', 'ar'],\n", - " 'barb': ['▁', 'b', 'ar', 'b'],\n", - " 'barbara': ['▁', 'b', 'ar', 'b', 'ar', 'a'],\n", - " 'barbarously': ['▁', 'b', 'ar', 'b', 'ar', 'ous', 'ly'],\n", - " 'barclay': ['▁', 'b', 'ar', 'clay'],\n", - " 'bare': ['▁', 'b', 'are'],\n", - " 'bargain': ['▁', 'b', 'ar', 'g', 'a', 'in'],\n", - " 'bargaining': ['▁', 'b', 'ar', 'g', 'a', 'in', 'ing'],\n", - " 'bark': ['▁', 'b', 'ar', 'k'],\n", - " 'barrier': ['▁', 'b', 'ar', 'r', 'i', 'er'],\n", - " 'barriers': ['▁', 'b', 'ar', 'r', 'i', 'ers'],\n", - " 'barry': ['▁', 'b', 'a', 'rry'],\n", - " 'base': ['▁', 'base'],\n", - " 'based': ['▁', 'bas', 'ed'],\n", - " 'bases': ['▁', 'base', 's'],\n", - " 'basic': ['▁', 'bas', 'ic'],\n", - " 'basin': ['▁', 'bas', 'in'],\n", - " 'basing': ['▁', 'bas', 'ing'],\n", - " 'basis': ['▁', 'bas', 'is'],\n", - " 'baskerville': ['▁', 'bas', 'k', 'er', 'v', 'il', 'le'],\n", - " 'basses': ['▁', 'bas', 'se', 's'],\n", - " 'basting': ['▁', 'bas', 't', 'ing'],\n", - " 'bathing': ['▁', 'b', 'a', 'thing'],\n", - " 'bats': ['▁', 'b', 'at', 's'],\n", - " 'batsman': ['▁', 'b', 'at', 's', 'man'],\n", - " 'battalions': ['▁', 'b', 'at', 't', 'al', 'ion', 's'],\n", - " 'batting': ['▁', 'b', 'at', 't', 'ing'],\n", - " 'battle': ['▁', 'b', 'a', 'ttle'],\n", - " 'bavaria': ['▁', 'b', 'a', 'v', 'ar', 'i', 'a'],\n", - " 'bavarian': ['▁', 'b', 'a', 'v', 'ar', 'i', 'an'],\n", - " 'bavarians': ['▁', 'b', 'a', 'v', 'ar', 'i', 'an', 's'],\n", - " 'bay': ['▁', 'b', 'a', 'y'],\n", - " 'be': ['▁', 'be'],\n", - " 'beach': ['▁', 'b', 'each'],\n", - " 'beaches': ['▁', 'b', 'each', 'es'],\n", - " 'beacon': ['▁', 'be', 'a', 'con'],\n", - " 'beaks': ['▁', 'be', 'a', 'k', 's'],\n", - " 'bean': ['▁', 'be', 'an'],\n", - " 'bear': ['▁', 'be', 'ar'],\n", - " 'bearer': ['▁', 'be', 'are', 'r'],\n", - " 'bears': ['▁', 'be', 'ar', 's'],\n", - " 'beastly': ['▁', 'b', 'east', 'ly'],\n", - " 'beasts': ['▁', 'b', 'east', 's'],\n", - " 'beaten': ['▁', 'be', 'a', 'ten'],\n", - " 'beautiful': ['▁', 'be', 'a', 'u', 't', 'i', 'ful'],\n", - " 'beautifully': ['▁', 'be', 'a', 'u', 't', 'i', 'ful', 'ly'],\n", - " 'beauty': ['▁', 'be', 'a', 'u', 'ty'],\n", - " 'became': ['▁', 'be', 'came'],\n", - " 'because': ['▁', 'because'],\n", - " 'beckoning': ['▁', 'be', 'ck', 'on', 'ing'],\n", - " 'become': ['▁', 'be', 'come'],\n", - " 'becomes': ['▁', 'be', 'come', 's'],\n", - " 'becoming': ['▁', 'be', 'com', 'ing'],\n", - " 'bed': ['▁', 'b', 'ed'],\n", - " 'bedlam': ['▁', 'b', 'ed', 'la', 'm'],\n", - " 'beds': ['▁', 'b', 'ed', 's'],\n", - " 'bedspreads': ['▁', 'b', 'ed', 's', 'p', 'read', 's'],\n", - " 'beech': ['▁', 'be', 'e', 'ch'],\n", - " 'been': ['▁', 'been'],\n", - " 'before': ['▁', 'before'],\n", - " 'befriended': ['▁', 'be', 'friend', 'ed'],\n", - " 'began': ['▁', 'be', 'g', 'an'],\n", - " 'begin': ['▁', 'be', 'g', 'in'],\n", - " 'beginner': ['▁', 'be', 'g', 'in', 'n', 'er'],\n", - " 'beginning': ['▁', 'be', 'g', 'in', 'n', 'ing'],\n", - " 'begins': ['▁', 'be', 'g', 'in', 's'],\n", - " 'begun': ['▁', 'be', 'g', 'un'],\n", - " 'behan': ['▁', 'be', 'h', 'an'],\n", - " 'behave': ['▁', 'be', 'have'],\n", - " 'behaviour': ['▁', 'be', 'h', 'a', 'vi', 'our'],\n", - " 'behind': ['▁', 'behind'],\n", - " 'beier': ['▁', 'be', 'i', 'er'],\n", - " 'being': ['▁', 'being'],\n", - " 'belgian': ['▁', 'be', 'l', 'g', 'i', 'an'],\n", - " 'belgium': ['▁', 'be', 'l', 'giu', 'm'],\n", - " 'belgrade': ['▁', 'be', 'l', 'gr', 'a', 'de'],\n", - " 'belief': ['▁', 'be', 'li', 'e', 'f'],\n", - " 'believe': ['▁', 'believe'],\n", - " 'believed': ['▁', 'believed'],\n", - " 'believes': ['▁', 'believe', 's'],\n", - " 'bell': ['▁', 'be', 'll'],\n", - " \"bell's\": ['▁', 'be', 'll', \"'\", 's'],\n", - " 'belmondo': ['▁', 'be', 'l', 'mon', 'do'],\n", - " 'belonged': ['▁', 'be', 'long', 'ed'],\n", - " 'belongs': ['▁', 'be', 'long', 's'],\n", - " 'below': ['▁', 'be', 'low'],\n", - " 'belt': ['▁', 'be', 'l', 't'],\n", - " 'ben': ['▁', 'be', 'n'],\n", - " 'bench': ['▁', 'be', 'n', 'ch'],\n", - " 'benches': ['▁', 'be', 'n', 'che', 's'],\n", - " 'bend': ['▁', 'b', 'end'],\n", - " 'bending': ['▁', 'b', 'end', 'ing'],\n", - " 'benefits': ['▁', 'be', 'ne', 'f', 'its'],\n", - " 'bent': ['▁', 'b', 'ent'],\n", - " 'ber': ['▁', 'be', 'r'],\n", - " 'berlin': ['▁', 'berlin'],\n", - " \"berlin's\": ['▁', 'berlin', \"'\", 's'],\n", - " 'bernhard': ['▁', 'be', 'r', 'n', 'hard'],\n", - " 'berry': ['▁', 'be', 'rry'],\n", - " 'bertrand': ['▁', 'bert', 'r', 'and'],\n", - " 'beset': ['▁', 'be', 'set'],\n", - " 'beside': ['▁', 'be', 'side'],\n", - " 'best': ['▁', 'best'],\n", - " 'best-seller': ['▁', 'best', '-', 's', 'ell', 'er'],\n", - " 'bet': ['▁', 'be', 't'],\n", - " 'betjeman': ['▁', 'be', 't', 'je', 'man'],\n", - " 'betrayal': ['▁', 'be', 'tr', 'a', 'y', 'al'],\n", - " 'betrayed': ['▁', 'be', 'tr', 'a', 'y', 'ed'],\n", - " 'better': ['▁', 'better'],\n", - " 'better-': ['▁', 'better', '-'],\n", - " \"betti's\": ['▁', 'be', 't', 't', 'i', \"'\", 's'],\n", - " 'between': ['▁', 'between'],\n", - " 'bevel': ['▁', 'be', 've', 'l'],\n", - " 'bevelled': ['▁', 'be', 'v', 'ell', 'ed'],\n", - " 'beware': ['▁', 'be', 'w', 'are'],\n", - " 'bewildered': ['▁', 'be', 'w', 'il', 'd', 'er', 'ed'],\n", - " 'beyond': ['▁', 'beyond'],\n", - " 'bidet': ['▁', 'b', 'i', 'de', 't'],\n", - " 'big': ['▁', 'big'],\n", - " 'bigger': ['▁', 'big', 'g', 'er'],\n", - " 'biggest': ['▁', 'big', 'g', 'est'],\n", - " 'bill': ['▁', 'b', 'ill'],\n", - " 'bills': ['▁', 'b', 'ill', 's'],\n", - " 'binding': ['▁', 'b', 'in', 'd', 'ing'],\n", - " 'biological': ['▁', 'b', 'i', 'o', 'lo', 'g', 'ical'],\n", - " 'bird': ['▁', 'b', 'i', 'r', 'd'],\n", - " 'birds': ['▁', 'b', 'i', 'r', 'd', 's'],\n", - " 'bishop': ['▁', 'b', 'is', 'hop'],\n", - " 'bit': ['▁', 'b', 'it'],\n", - " 'bite': ['▁', 'b', 'it', 'e'],\n", - " 'bits': ['▁', 'b', 'its'],\n", - " 'bitter-sweet': ['▁', 'b', 'it', 'ter', '-', 's', 'we', 'e', 't'],\n", - " 'bitterest': ['▁', 'b', 'it', 'ter', 'est'],\n", - " 'bitterly': ['▁', 'b', 'it', 'ter', 'ly'],\n", - " 'bituminized': ['▁', 'b', 'it', 'um', 'in', 'i', 'z', 'ed'],\n", - " 'black': ['▁', 'bl', 'a', 'ck'],\n", - " 'black-': ['▁', 'bl', 'a', 'ck', '-'],\n", - " 'black-listed': ['▁', 'bl', 'a', 'ck', '-', 'li', 'st', 'ed'],\n", - " 'blackbird': ['▁', 'bl', 'a', 'ck', 'b', 'i', 'r', 'd'],\n", - " 'blacks': ['▁', 'bl', 'a', 'ck', 's'],\n", - " 'blame': ['▁', 'bl', 'a', 'me'],\n", - " 'blamed': ['▁', 'bl', 'am', 'ed'],\n", - " 'blander': ['▁', 'bl', 'and', 'er'],\n", - " 'blank': ['▁', 'bl', 'an', 'k'],\n", - " 'blend': ['▁', 'bl', 'end'],\n", - " 'blight': ['▁', 'b', 'light'],\n", - " 'blind': ['▁', 'bl', 'in', 'd'],\n", - " 'blinked': ['▁', 'bl', 'in', 'k', 'ed'],\n", - " 'block': ['▁', 'block'],\n", - " 'blocks': ['▁', 'block', 's'],\n", - " 'bloem-': ['▁', 'b', 'lo', 'e', 'm', '-'],\n", - " 'blond': ['▁', 'bl', 'on', 'd'],\n", - " 'blood': ['▁', 'b', 'lo', 'od'],\n", - " 'bloodstained': ['▁', 'b', 'lo', 'od', 's', 'tain', 'ed'],\n", - " 'bloody': ['▁', 'b', 'lo', 'od', 'y'],\n", - " 'blouse': ['▁', 'b', 'lo', 'use'],\n", - " 'blouses': ['▁', 'bl', 'ous', 'es'],\n", - " 'blow': ['▁', 'b', 'low'],\n", - " 'blowflies': ['▁', 'b', 'low', 'f', 'l', 'ies'],\n", - " 'blown': ['▁', 'bl', 'own'],\n", - " 'blue': ['▁', 'bl', 'ue'],\n", - " 'blunt': ['▁', 'bl', 'un', 't'],\n", - " 'bluntly': ['▁', 'bl', 'un', 't', 'ly'],\n", - " 'bluster': ['▁', 'bl', 'u', 'ster'],\n", - " 'board': ['▁', 'board'],\n", - " 'boat': ['▁', 'bo', 'at'],\n", - " 'boat-train': ['▁', 'bo', 'at', '-', 'train'],\n", - " 'bobby': ['▁', 'bo', 'b', 'by'],\n", - " 'bodies': ['▁', 'bo', 'd', 'ies'],\n", - " 'body': ['▁', 'body'],\n", - " 'boeing': ['▁', 'bo', 'e', 'ing'],\n", - " 'bogy': ['▁', 'bo', 'g', 'y'],\n", - " 'boiled': ['▁', 'bo', 'il', 'ed'],\n", - " 'boils': ['▁', 'bo', 'il', 's'],\n", - " 'bold': ['▁', 'b', 'old'],\n", - " 'boldly': ['▁', 'b', 'old', 'ly'],\n", - " 'bolt': ['▁', 'bo', 'l', 't'],\n", - " 'bolted': ['▁', 'bo', 'l', 'ted'],\n", - " 'bomb': ['▁', 'bomb'],\n", - " 'bombay': ['▁', 'bomb', 'a', 'y'],\n", - " 'bombed': ['▁', 'bomb', 'ed'],\n", - " 'bombers': ['▁', 'bomb', 'ers'],\n", - " 'bonded': ['▁', 'b', 'on', 'd', 'ed'],\n", - " 'bone': ['▁', 'b', 'one'],\n", - " 'bones': ['▁', 'b', 'one', 's'],\n", - " 'bonn': ['▁', 'b', 'on', 'n'],\n", - " \"bonn's\": ['▁', 'b', 'on', 'n', \"'\", 's'],\n", - " 'book': ['▁', 'book'],\n", - " 'booklet': ['▁', 'book', 'le', 't'],\n", - " 'books': ['▁', 'book', 's'],\n", - " 'booming': ['▁', 'bo', 'o', 'm', 'ing'],\n", - " 'border': ['▁', 'b', 'order'],\n", - " 'bore': ['▁', 'bo', 're'],\n", - " 'bored': ['▁', 'b', 'or', 'ed'],\n", - " 'boredom': ['▁', 'bo', 're', 'do', 'm'],\n", - " 'bores': ['▁', 'bo', 're', 's'],\n", - " 'born': ['▁', 'b', 'or', 'n'],\n", - " 'borough': ['▁', 'bo', 'rough'],\n", - " 'borrow': ['▁', 'b', 'or', 'ro', 'w'],\n", - " 'borstal': ['▁', 'b', 'or', 'st', 'al'],\n", - " 'bosoms': ['▁', 'bo', 'so', 'm', 's'],\n", - " 'bossed': ['▁', 'bo', 's', 's', 'ed'],\n", - " 'bosses': ['▁', 'bo', 's', 'se', 's'],\n", - " 'both': ['▁', 'both'],\n", - " 'bottle': ['▁', 'bo', 'ttle'],\n", - " 'bottom': ['▁', 'bo', 't', 'to', 'm'],\n", - " 'bought': ['▁', 'bo', 'ug', 'h', 't'],\n", - " 'boun': ['▁', 'bo', 'un'],\n", - " 'bound': ['▁', 'b', 'ound'],\n", - " 'boutiques': ['▁', 'b', 'out', 'i', 'q', 'ue', 's'],\n", - " 'bow': ['▁', 'bo', 'w'],\n", - " 'bow-street': ['▁', 'bo', 'w', '-', 'st', 're', 'e', 't'],\n", - " 'bowed': ['▁', 'bo', 'w', 'ed'],\n", - " 'bowing': ['▁', 'bo', 'w', 'ing'],\n", - " 'bows': ['▁', 'bo', 'w', 's'],\n", - " 'box': ['▁', 'bo', 'x'],\n", - " 'boxes': ['▁', 'bo', 'x', 'es'],\n", - " 'boxing': ['▁', 'bo', 'x', 'ing'],\n", - " 'boy': ['▁', 'bo', 'y'],\n", - " 'boycotted': ['▁', 'bo', 'y', 'cott', 'ed'],\n", - " 'boycotting': ['▁', 'bo', 'y', 'cott', 'ing'],\n", - " 'boyd-orr': ['▁', 'bo', 'y', 'd', '-', 'or', 'r'],\n", - " 'boyle': ['▁', 'bo', 'y', 'le'],\n", - " 'boys': ['▁', 'bo', 'y', 's'],\n", - " 'braces': ['▁', 'br', 'a', 'ce', 's'],\n", - " 'brain': ['▁', 'b', 'rain'],\n", - " 'brain-activity': ['▁', 'b', 'rain', '-', 'act', 'i', 'v', 'ity'],\n", - " 'brain-children': ['▁', 'b', 'rain', '-', 'children'],\n", - " 'brains': ['▁', 'b', 'rain', 's'],\n", - " 'brandy': ['▁', 'br', 'and', 'y'],\n", - " 'brash': ['▁', 'br', 'as', 'h'],\n", - " 'brass': ['▁', 'br', 'as', 's'],\n", - " 'brauchitsch': ['▁', 'br', 'a', 'u', 'ch', 'its', 'ch'],\n", - " 'breach': ['▁', 'br', 'each'],\n", - " 'bread-and-butter': ['▁', 'b', 'read', '-', 'and', '-', 'but', 'ter'],\n", - " 'break': ['▁', 'b', 're', 'a', 'k'],\n", - " 'breaking': ['▁', 'b', 're', 'a', 'k', 'ing'],\n", - " 'breaks': ['▁', 'b', 're', 'a', 'k', 's'],\n", - " 'breath': ['▁', 'b', 're', 'a', 'th'],\n", - " 'breathing': ['▁', 'b', 're', 'a', 'thing'],\n", - " 'breathless': ['▁', 'b', 're', 'a', 'th', 'less'],\n", - " 'breeding': ['▁', 'b', 're', 'ed', 'ing'],\n", - " 'breezily': ['▁', 'b', 're', 'e', 'z', 'i', 'ly'],\n", - " 'brehm': ['▁', 'b', 're', 'h', 'm'],\n", - " 'brella': ['▁', 'br', 'ell', 'a'],\n", - " 'brenda': ['▁', 'br', 'end', 'a'],\n", - " 'brendan': ['▁', 'br', 'end', 'an'],\n", - " \"brendan's\": ['▁', 'br', 'end', 'an', \"'\", 's'],\n", - " 'brentano': ['▁', 'br', 'ent', 'a', 'no'],\n", - " 'brezhnev': ['▁', 'b', 're', 'z', 'h', 'ne', 'v'],\n", - " 'brian': ['▁', 'br', 'i', 'an'],\n", - " 'bridal': ['▁', 'br', 'id', 'al'],\n", - " 'bride': ['▁', 'br', 'i', 'de'],\n", - " 'brief': ['▁', 'brief'],\n", - " 'brief-': ['▁', 'brief', '-'],\n", - " 'briefcase': ['▁', 'brief', 'case'],\n", - " 'briefing': ['▁', 'brief', 'ing'],\n", - " 'brigadiers': ['▁', 'br', 'i', 'g', 'ad', 'i', 'ers'],\n", - " 'bright': ['▁', 'b', 'right'],\n", - " 'brighter': ['▁', 'b', 'right', 'er'],\n", - " 'brightly': ['▁', 'b', 'right', 'ly'],\n", - " \"brighton's\": ['▁', 'b', 'right', 'on', \"'\", 's'],\n", - " 'brilliant': ['▁', 'br', 'ill', 'i', 'ant'],\n", - " 'brilliantly': ['▁', 'br', 'ill', 'i', 'ant', 'ly'],\n", - " 'bring': ['▁', 'br', 'ing'],\n", - " 'brings': ['▁', 'br', 'ing', 's'],\n", - " 'bristled': ['▁', 'br', 'is', 't', 'led'],\n", - " 'bristol': ['▁', 'br', 'is', 'to', 'l'],\n", - " 'britain': ['▁', 'britain'],\n", - " \"britain's\": ['▁', 'britain', \"'\", 's'],\n", - " 'british': ['▁', 'british'],\n", - " 'british-owned': ['▁', 'british', '-', 'own', 'ed'],\n", - " 'britishers': ['▁', 'british', 'ers'],\n", - " 'brittle': ['▁', 'br', 'i', 'ttle'],\n", - " 'broad': ['▁', 'b', 'ro', 'ad'],\n", - " 'broadcast': ['▁', 'b', 'ro', 'ad', 'c', 'a', 'st'],\n", - " 'broadcasting': ['▁', 'b', 'ro', 'ad', 'c', 'a', 'st', 'ing'],\n", - " 'broke': ['▁', 'b', 'ro', 'ke'],\n", - " 'broken': ['▁', 'b', 'ro', 'k', 'en'],\n", - " 'bronx': ['▁', 'br', 'on', 'x'],\n", - " \"brook's\": ['▁', 'b', 'ro', 'o', 'k', \"'\", 's'],\n", - " 'brother': ['▁', 'brother'],\n", - " 'brother-': ['▁', 'brother', '-'],\n", - " 'brother-in-law': ['▁', 'brother', '-', 'in', '-', 'law'],\n", - " 'brought': ['▁', 'brought'],\n", - " 'brown': ['▁', 'brown'],\n", - " \"brown's\": ['▁', 'brown', \"'\", 's'],\n", - " 'bru\"cke': ['▁', 'br', 'u', '\"', 'ck', 'e'],\n", - " 'bruce': ['▁', 'br', 'u', 'ce'],\n", - " 'bruno': ['▁', 'br', 'un', 'o'],\n", - " 'brunswick': ['▁', 'br', 'un', 's', 'w', 'i', 'ck'],\n", - " 'brussels': ['▁', 'br', 'us', 's', 'el', 's'],\n", - " 'brutal': ['▁', 'br', 'u', 't', 'al'],\n", - " 'bryan': ['▁', 'br', 'y', 'an'],\n", - " 'bu\"ckerei': ['▁', 'b', 'u', '\"', 'ck', 'e', 're', 'i'],\n", - " 'buck': ['▁', 'b', 'u', 'ck'],\n", - " 'buckingham': ['▁', 'b', 'u', 'ck', 'ing', 'h', 'am'],\n", - " 'buckley': ['▁', 'b', 'u', 'ck', 'le', 'y'],\n", - " 'budge': ['▁', 'b', 'ud', 'g', 'e'],\n", - " 'budgerigar': ['▁', 'b', 'ud', 'g', 'er', 'i', 'g', 'ar'],\n", - " 'budget': ['▁', 'budget'],\n", - " 'budgetary': ['▁', 'budget', 'ary'],\n", - " 'budgette': ['▁', 'budget', 'te'],\n", - " 'buganda': ['▁', 'b', 'ug', 'and', 'a'],\n", - " 'build': ['▁', 'b', 'u', 'il', 'd'],\n", - " 'building': ['▁', 'building'],\n", - " ...}" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lex" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} -- cgit v1.2.3-70-g09d2