summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--notebooks/01-look-at-emnist.ipynb117
-rw-r--r--notebooks/02b-emnist-lines-dataset.ipynb2
-rw-r--r--pyproject.toml1
-rw-r--r--text_recognizer/datasets/base_data_module.py2
-rw-r--r--text_recognizer/datasets/base_dataset.py8
-rw-r--r--text_recognizer/datasets/emnist.py12
-rw-r--r--text_recognizer/datasets/emnist_essentials.json2
-rw-r--r--text_recognizer/datasets/emnist_lines.py172
-rw-r--r--text_recognizer/datasets/sentence_generator.py30
9 files changed, 215 insertions, 131 deletions
diff --git a/notebooks/01-look-at-emnist.ipynb b/notebooks/01-look-at-emnist.ipynb
index b70ce12..1f393db 100644
--- a/notebooks/01-look-at-emnist.ipynb
+++ b/notebooks/01-look-at-emnist.ipynb
@@ -2,9 +2,18 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n",
+ " %reload_ext autoreload\n"
+ ]
+ }
+ ],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
@@ -12,118 +21,88 @@
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
- "from PIL import Image\n",
- "import torch\n",
+ "\n",
"from importlib.util import find_spec\n",
"if find_spec(\"text_recognizer\") is None:\n",
" import sys\n",
- " sys.path.append('..')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "from text_recognizer.datasets import EmnistDataset"
+ " sys.path.append('..')\n",
+ "\n",
+ "from text_recognizer.datasets.emnist import EMNIST"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
- "outputs": [],
- "source": [
- "dataset = EmnistDataset(train=False, sample_to_balance=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "dataset.load_or_generate_data()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EMNIST Dataset\n",
- "Num classes: 80\n",
- "Input shape: [28, 28]\n",
- "Mapping: {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I', 19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'Q', 27: 'R', 28: 'S', 29: 'T', 30: 'U', 31: 'V', 32: 'W', 33: 'X', 34: 'Y', 35: 'Z', 36: 'a', 37: 'b', 38: 'c', 39: 'd', 40: 'e', 41: 'f', 42: 'g', 43: 'h', 44: 'i', 45: 'j', 46: 'k', 47: 'l', 48: 'm', 49: 'n', 50: 'o', 51: 'p', 52: 'q', 53: 'r', 54: 's', 55: 't', 56: 'u', 57: 'v', 58: 'w', 59: 'x', 60: 'y', 61: 'z', 62: ' ', 63: '!', 64: '\"', 65: '#', 66: '&', 67: \"'\", 68: '(', 69: ')', 70: '*', 71: '+', 72: ',', 73: '-', 74: '.', 75: '/', 76: ':', 77: ';', 78: '?', 79: None}\n",
+ "Num classes: 83\n",
+ "Mapping: ['<b>', '<s>', '</s>', '<p>', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '!', '\"', '#', '&', \"'\", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '?']\n",
+ "Dims: (1, 28, 28)\n",
+ "Train/val/test sizes: 260276, 65070, 54028\n",
+ "Batch x stats: (torch.Size([128, 1, 28, 28]), torch.float32, tensor(0.), tensor(0.1715), tensor(0.3314), tensor(1.))\n",
+ "Batch y stats: (torch.Size([128]), torch.int64, tensor(4), tensor(65))\n",
"\n"
]
}
],
"source": [
- "print(dataset)"
+ "data = EMNIST()\n",
+ "data.prepare_data()\n",
+ "data.setup()\n",
+ "print(data)"
]
},
{
"cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [],
- "source": [
- "def display_images(dataset, shift=0):\n",
- " fig = plt.figure(figsize=(9, 9))\n",
- " for i in range(9):\n",
- " x, y = dataset[i + shift]\n",
- " ax = fig.add_subplot(3, 3, i + 1)\n",
- " x = x.squeeze(0).numpy()\n",
- " ax.imshow(x, cmap='gray')\n",
- " ax.set_xticks([])\n",
- " ax.set_yticks([])\n",
- " ax.set_title(dataset.mapper(int(y)))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgQAAAILCAYAAACXVIRDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA5XklEQVR4nO3de3DV9Z3/8fdXbgkJ4ZJwCQECyE3uWlq5IwUWAbGAru5atHan086qO7IztusWZ0TX7bgus2VX3Zntzm6t21ZtR4FKim6g3G+OLkRAEEEIt3AJCZAgd87+0fY3/fF+fbffk3NyLsnz8edrTnK+JJ9z8vH4+r4/QSwWMwAA0Lzdku4LAAAA6ceGAAAAsCEAAABsCAAAgLEhAAAAxoYAAAAYGwIAAGBsCJIiCIJOQRAsDYLgQhAElUEQPJTuawLiwRpGtmMNJ65lui+giXjVzK6YWVczG2lmZUEQVMRisd1pvSogOtYwsh1rOEEBkwoTEwRBnpnVmtnQWCy273fZf5nZsVgs9nRaLw6IgDWMbMcaTg7+l0HiBpjZtd8vwt+pMLMhaboeIF6sYWQ71nASsCFIXL6Znb8pO2dm7dJwLUBDsIaR7VjDScCGIHH1ZlZwU1ZgZnVpuBagIVjDyHas4SRgQ5C4fWbWMgiC/n+QjTAziizIFqxhZDvWcBJQKkyCIAjeNLOYmX3Lfttu/bWZjaXdimzBGka2Yw0njk8IkuMxM8s1s1Nm9oaZ/SWLEFmGNYxsxxpOEJ8QAAAAPiEAAABsCAAAgLEhAAAAxoYAAAAYGwIAAGBxnnYYBAG3JCAR1bFYrHM6L4A1jASxhpHtQtcwnxAglSrTfQFAgljDyHaha5gNAQAAYEMAAADYEAAAAGNDAAAAjA0BAAAwNgQAAMDYEAAAAGNDAAAALM5JhcgMt9zi93FBELjs+vXrqbgcNHFqvaksUTdu3IiUAb+n1mGbNm1c1rVrV5e1bJnYn79r1665rKqqymWXL19O6HlSiU8IAAAAGwIAAMCGAAAAGBsCAABglArTQhUAW7duLR87cOBAl916660uKy0tddlvfvMbl504ccJlp0+fdlksxgmrTZ1ah2odDRs2zGUjR450WTxFQ1UW3LFjh8vKy8tdpkparNfspNagmVlhYaHLCgoKXDZixAiXjR071mWTJ092WV5ensvC1rBar3V1dS574YUXXFZWVuayTC188wkBAABgQwAAANgQAAAAY0MAAACMDQEAADDuMmh0LVq0cFmfPn1cduedd8qvf+qpp1xWXFzsMtWYfeSRR1z24Ycfuuz73/++y2pqauT1MEo2O+Xm5rqse/fuLlu0aJHL1NpUdyOENcYVdVdAZWWly5577jmXbd261WWff/55pOdAZgm7u2rMmDEuGzBgQKTHqTtg1HumGl0ctobVWlJ3u9x+++0uW7VqlcsuXrwY6TlSjU8IAAAAGwIAAMCGAAAAGBsCAABglAqTSp3D3b59e5eNGzfOZVOnTpXfs1+/fi7LyclxmSovqq+9evWqy/Lz81129uxZeT2UCjOfWgslJSUuU2XB0aNHu0yVD9VzxLM2ol7j9OnTIz3PkSNHXHblypXI14PGpwp7gwYNko9VZVL1fta2bVuXqbHAx48fd5l6LwzTqVMnl6n3TVXk3r9/v8s2bdrkskOHDkW+nsbCJwQAAIANAQAAYEMAAACMDQEAADBKhZGoMowq9s2bN89lqiw4Y8YMl6nyYRhV+FPX2K5dO5d16NDBZaoMGc/UOTQ+9ftQ0wLN9Bnxjz76qMuGDx/ust69e7tMlfjWrVvnsl27dsnruXbtmstUQexP/uRPXKbOsVdlsJUrV0Z6nFlmTIRr6m65xf+3pirmLViwQH79bbfd5rKo71OqYPrtb3870uPCfP3rX3fZ7NmzXTZ06FCXPfnkky7r0qWLy15++WWXqddOY+ITAgAAwIYAAACwIQAAAMaGAAAAGKXCSFSBUE1vu/vuu12mJr8VFhZGfm51JOyePXtcVlBQ4DI1ia5Vq1YuUxO31NGgZvFN90LyqGNi1dGvZrqcpx6riqyqQKiOat28ebPLVq9eLa+nvr7eZeqIWrVe1bTB2tpal6mjaCkPpo8qFar3KFXCMws/FvlmqnSn1qsqEKr31rBpmx999JHLevbs6bKBAwe6TJW7VZYJ+IQAAACwIQAAAGwIAACAsSEAAADWjEuFqlxnpotWs2bNcpmaQHjfffe5TJVjqqurXbZz5055Pa+++qrLVKlQTdJShcauXbu6bOLEiS47c+aMvJ5MOKKzqVNHuvbv399l3/ve9+TXJ3JM7Nq1a122detWl/3Hf/yHy6qqquT1qOLX4cOHXTZhwgSXffWrX3WZ+vepda2OvDXjWORU6NWrl8vU+0zYtE1VElXra9myZS47ePCgy06dOhXpOcJKheXl5S47evSoy9S/URUsM3USLJ8QAAAANgQAAIANAQAAMDYEAADAmnGpMOy4YXV07Jw5c1ymjo5VBUI12e+zzz5z2caNG+X1qEKXmvymCmKquKImEMYzqRCNr1u3bi5Tk/2Ki4vl16vJmqpIp47RVutw1apVLlMFr7CynpoYeP78eZepaXBq6qIqWKpjm9VEQzNKhcmm1tvDDz/sMvU+qo5jNzMrKytz2fLly132y1/+0mWqLJjoMcLqe6rXT7ZPcuUTAgAAwIYAAACwIQAAAMaGAAAAWBMsFaoinSq9qOmDZrr4oh6rpk+pCVmqFLh48WKXHTt2TF6PmhioCn/qepD51ATBqIWsoqIi+T1VAeqdd95xmSoLqjLXuXPnXBZPeapFixYuU0fhfulLX3KZKryqUq0qeKmfA5JPHQU/e/Zslw0YMMBl6vdmZvazn/3MZevXr3fZhQsXIlxh4tRkW1VMV4/LpqO5+SsCAADYEAAAADYEAADA2BAAAABjQwAAACzL7zJQdxT07dvXZaNHj3bZM888I79nSUmJy9TYS3VXwKJFi1ym7jL4/PPPXRbWOlUN7Y4dO7ps6NCh8utvplq9u3btcplqliMx6k6QMWPGuOzP//zPXabOl6+urpbPs2XLFpc999xzLlNr+OLFi/J7JqKwsNBlEyZMcNn06dNdpsYMHzhwwGUnT550WbaPkc0WavywylQDX42wNtPvkXV1dXFfW7K0a9fOZX369HFZXl6ey2pqalyWzn/L/4VPCAAAABsCAADAhgAAABgbAgAAYFleKmzTpo3LVIHw7rvvdpkqD5qZtW7d2mVRRxKr7Pjx4y6LZ2ylKuJ069bNZf369Yv0/dSoz8OHD7tMjYdFYtTvskePHi6LWsg6ceKEfJ6KigqXqXV46dIl+fXJptarKsGq4pZahypTxd9MHQ+bzdTY9EmTJrmsuLjYZaoEvn37dvk8R44ccVkqRlGHjYAfN26cyx544AGXqZHca9ascdm6detcptZwqvEJAQAAYEMAAADYEAAAAGNDAAAALItKhWpin5pspqYFhhUIFVUgbIwJhFGps8anTZvmsqilQjW9LdHz7uHl5OS4TP2Ovv3tb7tMlQrVxL6lS5fK51b5F198IR/bUKrQq67bzOxP//RPXXbvvfe6TP3M1DTF/fv3u0xNfrtx44a8HjScKreq91f1fl1bW+uyt956Sz6Pmu6Xit+num4zs+HDh0fK1Hvp5s2bXaZKk5mATwgAAAAbAgAAwIYAAAAYGwIAAGAZWipUxY7c3FyXjRw50mVqKpqaPlhZWSmfO1UTCG+mpniZmXXq1Mll6t+oyj5In/z8fJepUmHv3r1dpta/Oib2k08+kc+tjgJONjWJbsSIEfKxanqo+vqox4yr114mTHlratTUPrWu1RpW1IRJVeI2M7t+/Xqk75lsahKjmZ6iqd5z1WtPHdfN8ccAACBjsSEAAABsCAAAABsCAABgGVAqVMWViRMnukwVk77xjW+4TJUPL1686LLXX39dXs+yZctcluwJhOoa1URCM7Mnn3zSZWPHjnWZKsOoo47VJC0KWcmnpu6pSX5qLSjq6Ff1uwx7bFRRC73qtfe1r31Nfs9hw4a5TL3u1etMTQnds2ePy1jDyafWQseOHV2mjrJWv9+oU1JTRRW5BwwYIB87depUlxUVFbnsnXfecdlnn33mslQdPR4vPiEAAABsCAAAABsCAABgbAgAAIBlaKlQlVTUkb9q2pkqrqjpUTt27JDXU1VV5bJkTyBUBUJVmgzL1b9bXeOJEydctmvXLpepCWIcHRudmlimpkmqQpZaH6ogp44vTvSIavXchYWFLlPr9fbbb4/0ODP9GlfHOasjYY8ePeqys2fPyudBcqkS7JAhQ1ympqmqtamm86WqDKoKkmqtL1iwQH59r169XKb+rqxfv95liZR8U41PCAAAABsCAADAhgAAABgbAgAAYBlQKlSFrNLSUpepIzbVscYff/yxy9T0wfLycnk9aqphVG3btnWZKlo9++yzLgsrFfbt29dlqgx26tQpl73wwgsu27Rpk8vOnDnjskSKlE2VKiaZmZWUlLjspZdectmgQYNcVlBQ4DJV8lTHH4eV+FR5S2XqSNfHH3/cZeqYcfVvCTuC+9ChQy7bvHmzyxYvXuyyw4cPuyzRMiWiycvLc5l6H1aPU1NS1TpQj2sMUafDhh3hrd4jt23b5rKdO3e6LJsK2nxCAAAA2BAAAAA2BAAAwNgQAAAAY0MAAAAshXcZtGypn6pr164uu+uuuyI9rqamxmWvvvqqyzZs2OCysKayuk41claNpp03b57LVGt1+vTpLmvTpo28HjXaU50h/qtf/cplK1eudBl3FDScursjTPv27SNl6nuq9r8aFfxP//RP8rlVc1utd3VXgBqLre7mUWsm7DWlzoh/8803XabuEGJtpo96L1R3xah1pMZL7969O9LjElVUVOSy73znOy4bNWqUy9QdbmZm//Iv/+IydfdaZWVlhCvMXHxCAAAA2BAAAAA2BAAAwNgQAAAAy4DRxaq4okZhqsddunTJZaqkosZWhpVH1HjaYcOGuUydCz537lyXqfKhKhCGFdZUgVCVr8rKyiJ97fXr1+Xz4I8LK7hduXLFZdXV1S5T56Krtanccovfu6vz3M10CTYq9TpT/25V6A0riB05csRllFubtkTe1810QVW9R3bu3Nll9957r8vUSG5V3lVFQTOzn/zkJy6rqqpyWbavYT4hAAAAbAgAAAAbAgAAYGwIAACAZUCpUFEFKlUo6dKli8v+8R//0WWqoBJW/lDP06FDB5fl5+e7LCcnR37PKNejCoBmZsuXL3fZihUrXKZKhWrKIRourPipJvmpaWlh0yijPs/NVAHWTL9+orp48aLLjh8/7rK///u/d5k6C97M7ODBgy6rq6trwNUh3aKW5qJOoC0pKZFfrwp7aormwoULXTZjxgyXqULj/v37XbZ48WJ5PWoCYbYXCBU+IQAAAGwIAAAAGwIAAGBsCAAAgGVoqTAqdexmjx49XJZo+UOVtBIpbtXW1rosrJClCoQVFRUuo0CYPmramioaJrJmlETXtZpaeezYMZdt27bNZRs3bnSZKoKZ6aLijRs3olwi0ki9p6gyaNSjtdUEzT59+sjnVkXuoUOHumz8+PEu69Spk8tUKXDHjh0uC1vDTbFAqPAJAQAAYEMAAADYEAAAAGNDAAAALANKhaq4oo5RVUcdq2mBanqbmvwWT0lEXaM6ylYVUtS/5e2333bZBx98IJ973bp1ka4HjS+sCKeOAv7lL3/psgkTJrhMTW8Lm0AYlVrbhw4dcpk6Rvu1115zmSqxNpfJbc3ZiRMnXLZ69WqXPfLIIy4bPny4y1Th+0c/+pF8brWW1Pu9ynbv3u0yNYFw69atLjt9+rS8nuaCTwgAAAAbAgAAwIYAAAAYGwIAAGApLBWGFbLq6+tdpkpMavLVwIEDIz1OCStAqcJe1LLgmjVrXKaKOe+//36kx4VdD9IjbA2r6W2qfKXW5p133umyqMdoh63hK1euuGzz5s0uW7Vqlcu2bNniMnU0NwXCpk8VuT/99FOXqcLeokWLXNatWzeXhR1/rKYfqtfZW2+95bIlS5a4bO/evS5Tr5Pmvq75hAAAALAhAAAAbAgAAICxIQAAAGYWxFOiCIIg6Y0LNUWwc+fOLlOFlClTprisoKAg0vPGU3JU0wJVqVCVD1U5phkXBT+KxWKj0nkBjbGGlTZt2risuLjYZXPnznVZY6zhpUuXukwVWVWRDP+fZrOGo1Il2GnTprmsX79+Lgtb6+fPn3eZmra5adMml6lpg829LHiT0DXMJwQAAIANAQAAYEMAAACMDQEAADA2BAAAwDLgLoNEtGyZmsnLzfiugGSjoX0T1nDWYQ3fRN0pVlhY6LL8/HyXha1/tV4vXLjgspqaGpddv35dfk/8P9xlAAAAwrEhAAAAbAgAAAAbAgAAYGapaTQ1EopSyHasYWQ7VUyvrq6OlCGz8AkBAABgQwAAANgQAAAAY0MAAACMDQEAADA2BAAAwNgQAAAAY0MAAACMDQEAALD4JxVWm1llY1wImoXSdF+AsYaRGNYwsl3oGg7U2EkAANC88L8MAAAAGwIAAMCGAAAAGBsCAABgbAgAAICxIQAAAMaGAAAAGBsCAABgbAgAAICxIQAAAMaGAAAAGBsCAABgbAiSIgiCTkEQLA2C4EIQBJVBEDyU7msC4sEaRrZjDScu3uOPob1qZlfMrKuZjTSzsiAIKmKx2O60XhUQHWsY2Y41nCCOP05QEAR5ZlZrZkNjsdi+32X/ZWbHYrHY02m9OCAC1jCyHWs4OfhfBokbYGbXfr8If6fCzIak6XqAeLGGke1Yw0nAhiBx+WZ2/qbsnJm1S8O1AA3BGka2Yw0nARuCxNWbWcFNWYGZ1aXhWoCGYA0j27GGk4ANQeL2mVnLIAj6/0E2wswosiBbsIaR7VjDSUCpMAmCIHjTzGJm9i37bbv112Y2lnYrsgVrGNmONZw4PiFIjsfMLNfMTpnZG2b2lyxCZBnWMLIdazhBfEIAAAD4hAAAALAhAAAAxoYAAAAYGwIAAGBsCAAAgMV52mEQBNySgERUx2Kxzum8ANYwEsQaRrYLXcN8QoBUqkz3BQAJYg0j24WuYTYEAACADQEAAGBDAAAAjA0BAAAwNgQAAMDYEAAAAGNDAAAAjA0BAACwOCcVNldBELissLDQZfn5+S67cOGCy86ePeuy69evy+e+ceNGhCsE4nfLLf6/B1SWqGvXriX9ewJIPj4hAAAAbAgAAAAbAgAAYGwIAACANeNSYVh5qk2bNi4bMGCAyxYsWOCy2267zWVHjx512c6dO1328ccfy+vZtGmTy06fPu2yWIwTUfFbqgSr1vW0adNcNnLkSJdFLRqeP39e5kuXLnXZoUOHIn1PAKnDJwQAAIANAQAAYEMAAACMDQEAADA2BAAAwJrJXQaqdX3HHXfIx86bN89lM2fOdNnAgQNd1rp1a5eNGjXKZbNnz3ZZbW2tvJ6VK1e6bMmSJS7bu3evy65cueIy7kZo+nJzc13WvXt3l82dO9dlY8aMcVnUuwzUSG4zswMHDrjsyJEjLgsb3w3EI1UjudVY+WwfNc8nBAAAgA0BAABgQwAAAIwNAQAAsGZSKlSFEjVm2MzsW9/6lsuKiopcpoqKqrCnzoI/fvy4y9q2bSuv5/7773fZiBEjXLZo0SKXbd261WWnTp2Sz4PMpkYPq2KrmS7GqjUzffp0l+Xk5DTg6n4rrBT4F3/xFy47fPiwy9TroqamxmXqNYWmRa33du3auaxXr14uKy0tddnQoUNdFk/RUI3l3r9/v8tWr17tssuXL7ssUwu0fEIAAADYEAAAADYEAADA2BAAAABrJqVCVQDs3bu3fGz79u0jfc9Lly65rK6uzmW7d+922fPPP++ywsJC+Tzz58932ZQpU1z28MMPu6xVq1YuW7ZsmcsyteDSHKjfkVqDs2bNctmCBQvk91RlQ/U8LVq0cJkq7KlSlCp9tWyp305UeVG9/lRJa8uWLS57++23XVZZWekypnJmvrA1U1JS4rIhQ4a4bOLEiS7r06dPpK+Np1R47tw5l6n3dlWWraqqctnp06ddlgnrlU8IAAAAGwIAAMCGAAAAGBsCAABgzaRUqIQVSlQBURUIVbFJTalau3aty44dO+aysGMzVdFKFbLGjx/vMlVSWb9+vctUwQWpoQqEaqrgnDlzXNavXz/5PROZNqgKhNXV1S5T0zvDCmKqgDhgwACXFRcXu0xNolMTDU+cOOEy9W8xy4zyVlOnCqsdO3Z02UMPPSS/XhWnVTFQrY/GOP5YrZlhw4a5bOTIkS7bsWOHy9QR9urvgnrtNSY+IQAAAGwIAAAAGwIAAGBsCAAAgDWTUqEqCoaVTFS5r7y83GXPPfecy1Qp5OLFi1EuMdSnn34a6Xoee+wxl91xxx0u69mzp8soFaaGKvupCYSqQKgeF1biU6Ie160KgKpAqB4XD/WzUJl6bvXaq6+vd5macmiW+qJWU6emYA4ePNhlX/7yl1329NNPy+/ZpUsXlyVaDEyEev2o18Dw4cNdNmjQIJepI5rVFNl//dd/ldfTWEeA8wkBAABgQwAAANgQAAAAY0MAAACsmZQKW7du7bL8/Hz5WFUq3Ldvn8vUkZZqomGirl696rJTp065TF23+jeqyV5qklbY5EREoya1qWOAFy5c6LIePXq4TBUIVdHJTJcFo07nU9etirGqiKrWqplZhw4dXKam1kWdMKcmdapjcNVr1MzszJkzLmN6YTRqfahpm/fcc4/LRo0a5TK1DsySXyBU72eJvE7CqNep+vujXuOq8B1W3qVUCAAAGg0bAgAAwIYAAACwIQAAANYES4Wq9DJ16lSX3XvvvfLrjx496jJ1ZLAqWjVGMUl9T1UqVEe9UpRKDVXu69Onj8seffTRSI9Ta1hJ9PerynWHDh1y2SuvvOKy3bt3u6yurk4+j5paN3bsWJf17dvXZRMmTHBZYWGhy+bOnesyNeXQzOy73/2uy5he6EVd1+PGjXPZE0884TL1e2uMaZuqQLhhwwaX7dy502XqNWGmS46qtK2mjKpjvVWBUK1h9bfHzKysrMxlySiC8wkBAABgQwAAANgQAAAAY0MAAACMDQEAALAmeJeBarLOnz/fZWGji//2b//WZeXl5S5L1WhfNfZy6NChLsvNzXWZaszu2bPHZYwpToz6Hd15550uU2elR72jIGrDOowadVpRUeGylStXukw1tE+ePOmyK1euyOeuqalxWWVlpcvUXQZq7PGkSZNc1rVrV5cNGTJEXo967Tf3uwzU+srJyXGZWtfqLi71e0v0joKo1PuZuqPg3XffddnBgwfl91Sv03bt2rmsS5cuLlN31JSWlrpMreF+/frJ61F3PXCXAQAASAo2BAAAgA0BAABgQwAAACzLS4Wq6KFGnY4fP95l586dk99z8+bNLrt06VIDri451NjLyZMnu0wVSs6fP+8yNeIY0YSVotTv6Mknn3RZ7969G/zc8ZzdrsYPf/zxxy5TI4lVgTCsLBjV6dOnXaZKfGoc8ogRI1ymXuPq3Pj27dvL64mn3NbUtGrVSuZqvPTIkSNd9swzz7ise/fuLlO/j3hELcyqx129etVlqiz42WefuUy9dsKo4uM///M/u0yNmv+rv/orl6mfWUFBQeTrSQY+IQAAAGwIAAAAGwIAAGBsCAAAgGVRqVBNZurUqZPLHnzwQZepyWTvvfeefB41VS0VwibW3XHHHS5TZ2kfOHDAZStWrHDZiRMnGnB1MAsvZKnpmEVFRS5LZPqaKoPu3btXPnbx4sUu27Ztm8tU0er69esNuLr4qemOJSUlLlPTHdV7gbpuJnB6YSW1mTNnuuwrX/mKy9TvKJECYTzTNtVj1QTOs2fPukyVBS9cuBD5uaNeT11dncvq6+sTep5U4hMCAADAhgAAALAhAAAAxoYAAABYFpUKVemuY8eOLhs2bJjLVOFuyZIl8nnUkcGpoI7DNDP7+te/7jJVDHr99dddpo73TOfUxWynjjs1M+vTp4/L8vLyGvw8qiB37Ngxly1btkx+/aZNm1ymyqSpKhAq6nhc9dpVRxirUuHFixddpgpeZrqI1lyEvc9885vfdFmvXr1cpsqgiircqWLs1q1b5der37Ga+Ldr1y6X1dbWukxN4Ey0QK6uMZHicCbgEwIAAMCGAAAAsCEAAADGhgAAAFgWlQpVCUkVjlTh7o033nDZvn375PPEMzmroXJzc102f/58+dipU6e6TB3drI5tPnLkSAOuDmZ6+tqMGTPkY+fMmeMyNUVTUccAV1RUuEwdVVxeXi6/5xdffBHpuZMtbNqmKrKpEtvo0aMjfa0qQ65atcplYaXLqqoqmTc1qvQWVipUR0Uncky0Ki8fP37cZUuXLpVfrwqI6hhtdWS2Oq5bvWfGM8lSTSlVf2vUFFlVflfU355UT9vkEwIAAMCGAAAAsCEAAADGhgAAAFgWlQpVWWPs2LEuUxPi1JS3q1evJufC/ghVRlFHGt93333y61VRa+XKlS5T0+nSdZRzU1BcXOyyxx9/XD72tttuc5n6vaky3Lp161z21ltvuaysrMxlqZq4p/4tqnQ5bdo0+fWPPvqoy1RZNicnx2WqGKcmj7722msu27Jli7weVVhritTPThWxzXRpW329on6e77zzjsvUkfPqcWa6GBi18B11gqAqd3ft2lV+z969e7ts0qRJLlPlcPWzVdejJiyqSYxmjVc25BMCAADAhgAAALAhAAAAxoYAAABYhpYK1YSsyZMnR8rU8cXr1693WWMUslT5avDgwS578cUXXTZgwAD5Pffu3esydXSzmuKViqmLTZVag2HHH0c9ElYVWVVpSE3RTGeBsLCw0GWqdKkmNpqZjRkzxmVt27Z1mSpaqWONt23b5jI13VFNp2vuLly4IHO1vlQhWpXZ6uvrXabKsh999JHLwo7gVmtBlQVVYU8Vy1UJNj8/32UTJ06U16NKhaqgqcrv6r1E/RzVcd2VlZXyeigVAgCARsOGAAAAsCEAAABsCAAAgLEhAAAAlqF3Gajzo1VTWTU6z54967LGaBsXFRW5bMSIES574oknXPalL33JZUePHpXP89JLL7lM3XnAHQWNL2yUq2pEKydOnHDZihUrXPbZZ5/Fd2E3Ua1mdY3qdTZhwgSX/dmf/ZnL+vfv77KBAwfK61ENbzXuVr1O1ZjuF154wWWqjd3cXxPqzgF1x5WZ2cmTJ13Wo0cPl0UdC6zeC3v16uWysLsMlKijmEtLS11WUFDgMnUXRdjoYvXYqD8LNYZ5z549Lvvwww9ddvjwYXk9jYVPCAAAABsCAADAhgAAABgbAgAAYBlaKlSjJ9XoSDV6sqamJunXo8a5lpSUuGz06NGRMlWk2bp1q3zuDz74wGWqpILMp0peYaNkb6bOblfFPjOzKVOmuEyVqoYNG+ay8ePHu0yNLlaFqrBxqqdOnXKZKguuXbs20uMY091wasywmR6rrajfuxrpPWvWLJep9Z/o702NLlZ/F1QpUFHv9Wb6OtV6V2VZVdhcvny5y7Zv3+4yNc64MfEJAQAAYEMAAADYEAAAAGNDAAAALENLhVHPolcFl927d7ssnkmF6pz2adOmueyb3/ymy4YPH+4yVeZ65513XLZo0SJ5PZ9//rnLKFBlp06dOrnssccec1ltba3LVNF26tSp8nlU2VC9plSBSk1aU0Wp8+fPu2zDhg3yet58881Ij1WvU/XcaLiwn2fUn71aR6qwp0rg6aSK3KpIGTYZcMeOHS47dOiQy44dO+ay/fv3u2z16tUuUz/veCY5JgOfEAAAADYEAACADQEAADA2BAAAwDK0VKgmWqkSk5rYV1VV5TI15U0VAM3M5s2b57L58+e7TB3lqUoqUQuEqjxoRoGwKVHF2Hvuucdlav2rMle3bt3k8+Tk5ES6HjVp7dKlSy5TxzGrTJUHzXSB8MyZMy5LdYGqOVLvj2ZmP/zhD12mytTqGPqwY8HTRa3rnTt3ukwV0N999135PY8cOeIyNfVR/Q1Q1xM21TPdMus3CQAA0oINAQAAYEMAAADYEAAAAMvQUuGJEydctmLFCpfNnTvXZQ888IDLBg4c6LKwQtatt97qstatW7vs4MGDLtu2bZvLohYIKQ9mvrAiUNTfXaomuqlioCrlbty40WVbtmxxWXl5ucvUa1QVBc0oC2aSsKPT1e9dlebU+142lAp37drlsgMHDrhMlWXN9BTBTC0GJiKzfpMAACAt2BAAAAA2BAAAgA0BAACwDC0VqlLU0qVLXabKXE899ZTLJk+eHOlrzXR5RBUaX3vtNZdVVFS4rLKyMvJzI3OcPXvWZWHH+168eNFlqsiqSoWKWh/qetT0NDM9bU2VqtS/p6amxmVqciKyU9h7jzrKVx0FvHLlymRfUkpk07TAdOITAgAAwIYAAACwIQAAAMaGAAAAmFkQT8EtCIK0teHatGnjsuLiYpc98sgjLrvrrrtc9j//8z/yeY4dO+ay5cuXu0yVBZnI9kd9FIvFRqXzAqKu4SAIXNa5c2f5WDX1csqUKS4rKCiI8tSRJ62pEquZ2fHjx12mjmVlvTZI1qxhIEToGuYTAgAAwIYAAACwIQAAAMaGAAAAGBsCAABgWXSXQVQ5OTkuKywsdJka0Wqm29iMbk2arG5ot2jRQuZqJLG686Bly4ZPCj937pzLamtr5WNZr40qq9cwYNxlAAAA/i9sCAAAABsCAADAhgAAAJhZw1tOGerSpUsuU+OIgXiFjfpVuTpfHgAyGZ8QAAAANgQAAIANAQAAMDYEAADA2BAAAABjQwAAAIwNAQAAMDYEAADA2BAAAACLf1JhtZlVNsaFoFkoTfcFGGsYiWENI9uFruEgFuNobQAAmjv+lwEAAGBDAAAA2BAAAABjQwAAAIwNAQAAMDYEAADA2BAAAABjQwAAAIwNAQAAMDYEAADA2BAAAABjQwAAAIwNQVIEQdApCIKlQRBcCIKgMgiCh9J9TUA8WMPIdqzhxMV7/DG0V83sipl1NbORZlYWBEFFLBbbndarAqJjDSPbsYYTxPHHCQqCIM/Mas1saCwW2/e77L/M7FgsFns6rRcHRMAaRrZjDScH/8sgcQPM7NrvF+HvVJjZkDRdDxAv1jCyHWs4CdgQJC7fzM7flJ0zs3ZpuBagIVjDyHas4SRgQ5C4ejMruCkrMLO6NFwL0BCsYWQ71nASsCFI3D4zaxkEQf8/yEaYGUUWZAvWMLIdazgJKBUmQRAEb5pZzMy+Zb9tt/7azMbSbkW2YA0j27GGE8cnBMnxmJnlmtkpM3vDzP6SRYgswxpGtmMNJ4hPCAAAAJ8QAAAANgQAAMDYEAAAAGNDAAAALM7DjYIgoIGIRFTHYrHO6bwA1jASxBpGtgtdw3xCgFSqTPcFAAliDSPbha5hNgQAAIANAQAAYEMAAACMDQEAADA2BAAAwNgQAAAAY0MAAACMDQEAALA4JxWmU4sWLSI9LgiCSI+7ceNGXDmQzVq29C/1Vq1auSwvLy/S97tw4YLL2rRpIx9bX1/vsmvXrkV6HiATqL8rhYWFLsvPz3eZeq2cOXPGZZnwt4dPCAAAABsCAADAhgAAABgbAgAAYBlaKmzXrp3Lpk6dGulxpaWlkZ5j165dMq+oqHBZVVWVyy5fvuyyTCiFoGm65Ra/dw8r8fXv399lU6ZMcVlJSYnLevXqFel6Dh8+7DJVsjLTr7Xy8nKXffrppy67cuWKy2IxTv9F4wgrpavX2pgxY1zWt29fl6nXyoYNG1x27tw5l129elVeT2PhEwIAAMCGAAAAsCEAAADGhgAAABgbAgAAYBl6l4Fq8Ldu3dplzzzzjMui3mVw9uxZmR85csRla9ascdn69etdtnr1apd98cUXka4HzZNqNXfu3Nll48aNc9nYsWPl95w2bZrL1J0HanSxuptBUXfUhDW01Z0C8+fPd9nixYtdtm3bNpcdPHjQZdevX5fPDYRR63/w4MHysSNHjnTZwoULXVZcXOyyuro6l7388ssue/fdd10WdjdcY+ETAgAAwIYAAACwIQAAAMaGAAAAWIaWClVBSBWJop7xHrW4FZYPHz7cZXPnznXZj3/8Y5f9+7//u8tqampcFjb2mHHITUfbtm1d1r17d5epstKMGTNc1qlTJ/k86nWRbFHLh2Zmubm5LlOvqeeff95lH3/8scteeeUVl6niL6+d5km936tS+qhRo1z24osvyu/Zr18/l3Xp0iXSc+fl5bnsvvvuc5kq3+7du1dez7Vr12SeKD4hAAAAbAgAAAAbAgAAYGwIAACAZUCpUJWT7rrrLpc98cQTLlNToRR1fnrYmepnzpxxmZpq2L59e5epa1TFk+rqapft2LFDXo86N15NcuSM+MxSVFTksu985zsuU8Wm6dOnuywnJyeh61EFO1XeVWWlkydPRnqOsNejOktela/69Onjsp49e7pMnRG/c+dOl506dUpeD5o2VWLt1q2by+6++26XqYmeZrrAq9awytR7c4cOHSJdoyrJm1EqBAAAjYgNAQAAYEMAAADYEAAAAMuAUqGaIPWVr3zFZaNHj470tVGpYp6Z2ZYtW1xWUVHhshEjRrhs0qRJLps9e3ak51aFKjOzPXv2uKyqqsplFy9edBmT2lJDFQjnzJnjMlU6LSwsdFnUSYNhRdLTp0+7bNOmTS5TUwDPnz/vMnXUtypPPf744/J6JkyY4LKSkhKXqfKh+llMnDjRZep46OXLl8vr4XWRndSkT1XEe/jhh12mji+eOnWqy9QaNIs+mTNquVsVcFWZvmvXrvLrjx496rJkFA35hAAAALAhAAAAbAgAAICxIQAAAJbCUqEqIZmZDRw40GUPPvigy1S5QhU41BTAqEUpM7Mf/OAHLjt8+LDLevXq5bLvf//7LlMFqN69e0f6fma6dLZv3z6XqX+PmnJ46dIl+Tz448LKfg899JDLnn76aZdFPS5VrWs1QfPQoUPyel599VWXrVy50mW1tbWRnluVldR1f/e735XXo9b7k08+6bL777/fZarkpabGjR071mXvvfeevB5VwEVmUb93NUVQlQVVoVeV+Fq0aOGysKmc6nrUtFr1ulDvGypTpclUHGX+h/iEAAAAsCEAAABsCAAAgLEhAAAAlsJSoSrHmZktWLDAZapoqI48/eSTT1y2ZMkSl+3evdtlYYUsVd5SRasDBw647Hvf+57LVKFKTXSbMWOGvJ6ZM2dGeuzXvvY1l6mS14oVK1ymjsGFF3YUaY8ePVwW9bhU5cqVKy5TEzTDirEbNmxwmVrXifze1WuipqZGPlaV+FavXu0ydeyz+pmrqXFq0md+fn7k60F6hJXm1NHEzz77rMu6d+/usoKCApepYvlbb73lstdff11ej/r7NXToUJep170qHavXnppKe+7cOXk9jTVtk08IAAAAGwIAAMCGAAAAGBsCAABgKSwVdujQQebqWGM1FergwYMu++u//muXqfKVKmlFPaYyTNQpiarM9Td/8zcuU8fTmunJiaq4okpVjz76qMtUwVL9bCkaemFHkapjS9XR3FGnEqp19PLLL7ssrFSo1nsqhBWdLly44DJ17WoiaF5enstyc3NdNmTIEJepSXJm+nhoND5VsFbHyJvpo8JViU+Vzd9++22XrVq1ymVqeqd67YUpKytzWWlpqctUWVZR781nz56Vj6VUCAAAGg0bAgAAwIYAAACwIQAAAMaGAAAAWArvMghr/Ko7ClSDcvv27S5TrczLly834Ooaj2qRnzp1ymWqBWumx14OGjTIZaoBH7Xd+p//+Z8ue//99+Vj09VgzwSqLW+mx2Crc9qjnm1eVFTksm984xsu279/f+TryTRVVVUuW7t2rcvUHTWqra7GGSN91FpXdxTMmjVLfr16rLqj4Pjx4y577733XLZt2zaXqTvA4mnvX7t2zWVR715Tz5PI90sWXkUAAIANAQAAYEMAAACMDQEAALBGKhXm5OS47J577pGP7datm8tU4eKTTz5xWdhYx2x07NgxmavxzF/+8pdd9nd/93cuU0XDmTNnukyVazZs2CCvpzmXCtXPyczsRz/6kctUMXDixIkuU2U4VbRV5at169bJ6/nxj3/sskwbRa0KYnV1dZEeh8yi1roqBapxxMOHD4/8PL/4xS9c9t///d8uW7ZsmcsuXbrkskQLez179nSZGsWvCvUbN2502a5du1zWWCOKw/AJAQAAYEMAAADYEAAAAGNDAAAArJFKhYWFhS6bNGmSfKwqUKkCiCpmtG3b1mXnzp2LcokZJ6w8pYomqoDYo0cPlz3++OMuUz/H8+fPR76e5iys4LNz506XJbvwevjwYZd98MEH8rGZViBE05Gbm+syVXidM2eOyyZMmOCysPeZJUuWuOyVV15xmXqdNca0WjV5ce7cuS578MEHXdahQweXqSm7qjhPqRAAAKQcGwIAAMCGAAAAsCEAAADWSKVCVeqorq6Wj1WlCXU88IEDB1yW6qMh00EVxFRxcs2aNS57+OGHXdaxY0eX3XrrrS5TUw7NzD7//HOZN2eqqDl48GCXRT2iV5Vqly5d6rJ9+/ZF+n5AQwRB4LLu3bu7bOrUqS4bNmyYy9R7mToG28zsww8/dJmaFKqODG4MrVq1cpmasquO61Y/R/V3LxPKwHxCAAAA2BAAAAA2BAAAwNgQAAAAa6RSoSq97dmzRz723nvvddmhQ4dctmrVKpep41KbAzXdS0252r59u8tKS0td9tWvftVl8+fPl8/90ksvuUyV4JqisFLgkCFDXKbKm1GdOHHCZStWrHBZNv/cVUmrXbt2kR6H5FNrWxXkFi5c6LJ58+a5TP3eysrKXKaOKjYze//9912WigKhmkhoZlZcXOyyyZMnR3qcOjJeTYdNVUHy/8InBAAAgA0BAABgQwAAAIwNAQAAsEYqFaoJgmHHOKoyy6hRo1x2//33u6w5F9xupo4BVUdsqhJnXl6ey2bPni2f5/XXX3eZKoE2RWGlwqFDh7pMHXkalSqNZuux3mZ6UtuAAQNcpibeqWlwSqaWtLKFOoa+V69eLps4caLL1OtCHdf92muvuWzLli3yelLxPq7WZe/eveVjx48f77KePXtGeh5VnFSTR8OmNqYSnxAAAAA2BAAAgA0BAAAwNgQAAMAaqVSoqNKPmS5QqZKbmob1q1/9ymU7d+50WSYcK9nYWrdu7bL8/Pw0XEnzo0pVqrCUiBYtWiT1+6VSYWGhyxYsWOCygQMHukyV3VRZcO3atS47efJktAuEnBLZt2/fSI9ThVf1PqyydJZl1XvmnXfeKR87ZcoUl6n31/r6epetXr3aZWoaqZpomGp8QgAAANgQAAAANgQAAMDYEAAAAGNDAAAArJHuMlAt4HXr1snHqnGNqt2qGsjPPvusy1555RWXqXZrdXW1vJ6wEcuNLWwsrmpZq6bvjBkzXDZnzhyXqfO+4/l90dz21B006u6ZsLPWb9axY0eXjRkzxmVqPKyZ2cWLFyM9T7KF3QmhxsFOmDDBZWqtKzU1NS7bvHmzyy5fvhzp+zUnYe8z48aNc9kDDzzgMjWSe8WKFS574403XFZZWemyVN0Bpu76GTRokMueeuop+fX9+vVzmfpbof6N69evd1mmjtjnEwIAAMCGAAAAsCEAAADGhgAAAFgKRxeHFaBU4aJ79+4uy8nJcdmsWbNcpgp3qlT4m9/8Rl6PGjP5xRdfyMdGoa5b/ftGjBghv37s2LEu69Onj8vUed2dO3eOcomy4HLs2DH5WFWWay7CCqeHDh1ymRphmpubG+l51BpWo1M3btwov14Vm2KxmMtUmTQqVSBUI4rNzEaOHOmygoKCBj+3Gvt64MABl6WrIJzJwkqFQ4YMiZSpct7x48dddubMGZepUcFh43pV2TBqKVeVU4uLi12mCoSqvB7m7bffdtl7773nsrD30kzEJwQAAIANAQAAYEMAAACMDQEAALAUlgpVycTM7Ac/+IHL1KS2e+65x2WqZHLXXXe5bOLEiS6bPXu2vJ6f/vSnLnv33Xddps7xbt++vcvUdavn7tmzp7we9bNQxSCVqQLg0aNHXfbzn//cZUuXLpXXk0gRLduFldQqKipcpop9RUVFLlMlLVWKuu+++1wWVszbsWOHy86ePesyVehVZUhl2LBhLnvwwQflY9VUwrAC4s1U4VWtzX379kX6ftAuXLgQKVPrdfDgwS5T0zvV3wBVEDXT67BLly4uU38D1DTFoUOHumz06NGRvp+Zno65atUql23bts1lYcXJTMQnBAAAgA0BAABgQwAAAIwNAQAAsBSWCtWkNDM95UqVoqZOneqyvLw8l0Ut3JWUlMjrUUcG5+fnu6yurs5lasKcuu7+/fu7LOzo17DJYlGoYo86JrasrMxl6lhqaGoS2Q9/+EOXPf/88y5TUyvVRMOokzrN9FHYqmCqjrKOOolSFbc6deokHxt2LPLNVIlt//79LlOlwkw9TjbThJWCly9f7jJVpHvxxRddpkrbqrCn1quaMGmmS7nqedTfAPVeqt7DVbZr1y55PR9++KHLVq5c6TJVnAz725eJ+IQAAACwIQAAAGwIAACAsSEAAACWwlJhGHW08E9+8hOX9evXz2WqAKhKJkpYiW/48OEuU5O4oop6ZGc8ok4gVEc5/8M//IPL1NHUzfmY43hdvnzZZcuWLXOZKojefffdLps3b57LVNEwrHCqcrUOe/fuLb8+FVS5TU1+Uz/HTz/9tDEuqVlTEwNVuVuVPFXBVL2/qjK1OpbYzOz22293WdeuXV2m1rWapqiKfUeOHHGZKleamW3fvt1lalqtOrY5m/AJAQAAYEMAAADYEAAAAGNDAAAAzCyIZ4pSEAQpGbmkSiGlpaUue+SRR1ymClndunVzmZoqaKbLdGHHzDaUKlTV1tbKx0Y9MvS5555z2e7duyN9vxRO0vooFouNStWTKalaw4oqBqpJhQsXLnSZmtIWVshq1aqVyxqj3HqzsCl4auKdOnr53/7t31ymjpaurq5uwNUlTbNZw2odqYL1ggULXDZkyBCXqfdwNS3QTK9XdYS3mm6p3uMOHjzosp/97GcuKy8vl9ejisNZXCAMXcN8QgAAANgQAAAANgQAAMDYEAAAAMvQUmFU6kjYAQMGuOzWW291Wd++feX3VEWtmTNnuizqka7qmOQ1a9a4TB1LbKaLgVeuXHGZOoI3A6cNNptCVlSqQFtYWOiyXr16uWzSpEnye6qjvdVjEynLqqO1165dKx+r1vamTZtcpsqCN27ciP/iGlezXsNR16taWyNGjHBZ2LTMqEcTq2OSVYlVlQ/V47K4KBgPSoUAACAcGwIAAMCGAAAAsCEAAADGhgAAAFiW32UQlTofPuws+fbt27usY8eODX5uNc715MmTLlOjMc0ysmWdiGbd0E62sHHEauRs1LPko4q6rs302s7idc0avknU91f1PpqXlye/p1qb586dc1l9fb3L1N1Var1l8RpMFHcZAACAcGwIAAAAGwIAAMCGAAAAWDMpFSJjUMhCtmMNI9tRKgQAAOHYEAAAADYEAACADQEAADA2BAAAwNgQAAAAY0MAAACMDQEAADA2BAAAwMziPf+02swqG+NC0CyUpvsCjDWMxLCGke1C13Bco4sBAEDTxP8yAAAAbAgAAAAbAgAAYGwIAACAsSEAAADGhgAAABgbAgAAYGwIAACAsSEAAABm9r9cNQN7oeF7cwAAAABJRU5ErkJggg==\n",
- "text/plain": [
- "<Figure size 648x648 with 9 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "torch.Size([128, 1, 28, 28]) torch.float32 tensor(0.) tensor(0.2204) tensor(0.3593) tensor(1.)\n",
+ "torch.Size([128]) torch.int64 tensor(4) tensor(4)\n"
+ ]
}
],
"source": [
- "display_images(dataset)"
+ "x, y = next(iter(data.test_dataloader()))\n",
+ "print(x.shape, x.dtype, x.min(), x.mean(), x.std(), x.max())\n",
+ "print(y.shape, y.dtype, y.min(), y.max())"
]
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
- "image/png": "\n",
+ "image/png": "\n",
"text/plain": [
"<Figure size 648x648 with 9 Axes>"
]
},
- "metadata": {},
+ "metadata": {
+ "needs_background": "light"
+ },
"output_type": "display_data"
}
],
"source": [
- "display_images(dataset, 9)"
+ "fig = plt.figure(figsize=(9, 9))\n",
+ "for i in range(9):\n",
+ " ax = fig.add_subplot(3, 3, i + 1)\n",
+ " rand_i = np.random.randint(len(data.data_test))\n",
+ " image, label = data.data_test[rand_i]\n",
+ " ax.imshow(image.reshape(28, 28), cmap='gray')\n",
+ " ax.set_title(data.mapping[label])"
]
}
],
@@ -143,7 +122,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.4"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/notebooks/02b-emnist-lines-dataset.ipynb b/notebooks/02b-emnist-lines-dataset.ipynb
index f82342b..7bc979d 100644
--- a/notebooks/02b-emnist-lines-dataset.ipynb
+++ b/notebooks/02b-emnist-lines-dataset.ipynb
@@ -322,7 +322,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.2"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index 33d539e..e9a41b4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,7 @@ fail_under = 50
[tool.poetry.scripts]
download-emnist = "text_recognizer.datasets.emnist:download_emnist"
+generate-emnist-lines = "text_recognizer.datasets.emnist_lines:generate_emnist_lines"
download-iam = "text_recognizer.datasets.iam_dataset:main"
create-emnist-support-files = "text_recognizer.tests.support.create_emnist_support_files:create_emnist_support_files"
create-emnist-lines-datasets = "text_recognizer.datasets.emnist_lines_dataset:create_datasets"
diff --git a/text_recognizer/datasets/base_data_module.py b/text_recognizer/datasets/base_data_module.py
index 830b39b..f5e7300 100644
--- a/text_recognizer/datasets/base_data_module.py
+++ b/text_recognizer/datasets/base_data_module.py
@@ -46,7 +46,7 @@ class BaseDataModule(pl.LightningDataModule):
def setup(self, stage: str = None) -> None:
"""Split into train, val, test, and set dims.
-
+
Should assign `torch Dataset` objects to self.data_train, self.data_val, and
optionally self.data_test.
diff --git a/text_recognizer/datasets/base_dataset.py b/text_recognizer/datasets/base_dataset.py
index a004b8d..a9e9c24 100644
--- a/text_recognizer/datasets/base_dataset.py
+++ b/text_recognizer/datasets/base_dataset.py
@@ -61,13 +61,13 @@ def convert_strings_to_labels(
strings: Sequence[str], mapping: Dict[str, int], length: int
) -> Tensor:
"""
- Convert a sequence of N strings to (N, length) ndarray, with each string wrapped with <S> and </S> tokens,
- and padded wiht the <P> token.
+ Convert a sequence of N strings to (N, length) ndarray, with each string wrapped with <s> and </s> tokens,
+ and padded wiht the <p> token.
"""
- labels = torch.ones((len(strings), length), dtype=torch.long) * mapping["<P>"]
+ labels = torch.ones((len(strings), length), dtype=torch.long) * mapping["<p>"]
for i, string in enumerate(strings):
tokens = list(string)
- tokens = ["<S>", *tokens, "</S>"]
+ tokens = ["<s>", *tokens, "</s>"]
for j, token in enumerate(tokens):
labels[i, j] = mapping[token]
return labels
diff --git a/text_recognizer/datasets/emnist.py b/text_recognizer/datasets/emnist.py
index 7c208c4..66101b5 100644
--- a/text_recognizer/datasets/emnist.py
+++ b/text_recognizer/datasets/emnist.py
@@ -70,9 +70,11 @@ class EMNIST(BaseDataModule):
if stage == "fit" or stage is None:
with h5py.File(PROCESSED_DATA_FILENAME, "r") as f:
self.x_train = f["x_train"][:]
- self.y_train = f["y_train"][:]
+ self.y_train = f["y_train"][:].squeeze().astype(int)
- dataset_train = BaseDataset(self.x_train, self.y_train, transform=self.transform)
+ dataset_train = BaseDataset(
+ self.x_train, self.y_train, transform=self.transform
+ )
train_size = int(self.train_fraction * len(dataset_train))
val_size = len(dataset_train) - train_size
self.data_train, self.data_val = random_split(
@@ -82,8 +84,10 @@ class EMNIST(BaseDataModule):
if stage == "test" or stage is None:
with h5py.File(PROCESSED_DATA_FILENAME, "r") as f:
self.x_test = f["x_test"][:]
- self.y_test = f["y_test"][:]
- self.data_test = BaseDataset(self.x_test, self.y_test, transform=self.transform)
+ self.y_test = f["y_test"][:].squeeze().astype(int)
+ self.data_test = BaseDataset(
+ self.x_test, self.y_test, transform=self.transform
+ )
def __repr__(self) -> str:
basic = f"EMNIST Dataset\nNum classes: {len(self.mapping)}\nMapping: {self.mapping}\nDims: {self.dims}\n"
diff --git a/text_recognizer/datasets/emnist_essentials.json b/text_recognizer/datasets/emnist_essentials.json
index 100b36a..3f46a73 100644
--- a/text_recognizer/datasets/emnist_essentials.json
+++ b/text_recognizer/datasets/emnist_essentials.json
@@ -1 +1 @@
-{"characters": ["<b>", "<s>", "</s>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]} \ No newline at end of file
+{"characters": ["<b>", "<s>", "</s>", "<p>", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "!", "\"", "#", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "?"], "input_shape": [28, 28]}
diff --git a/text_recognizer/datasets/emnist_lines.py b/text_recognizer/datasets/emnist_lines.py
index ae23feb..9ebad22 100644
--- a/text_recognizer/datasets/emnist_lines.py
+++ b/text_recognizer/datasets/emnist_lines.py
@@ -1,16 +1,21 @@
"""Dataset of generated text from EMNIST characters."""
from collections import defaultdict
from pathlib import Path
-from typing import Dict, Sequence
+from typing import Callable, Dict, Tuple, Sequence
import h5py
from loguru import logger
import numpy as np
+from PIL import Image
import torch
from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
-from text_recognizer.datasets.base_dataset import BaseDataset
-from text_recognizer.datasets.base_data_module import BaseDataModule
+from text_recognizer.datasets.base_dataset import BaseDataset, convert_strings_to_labels
+from text_recognizer.datasets.base_data_module import (
+ BaseDataModule,
+ load_and_print_info,
+)
from text_recognizer.datasets.emnist import EMNIST
from text_recognizer.datasets.sentence_generator import SentenceGenerator
@@ -54,18 +59,23 @@ class EMNISTLines(BaseDataModule):
self.emnist = EMNIST()
self.mapping = self.emnist.mapping
- max_width = int(self.emnist.dims[2] * (self.max_length + 1) * (1 - self.min_overlap)) + IMAGE_X_PADDING
-
- if max_width <= IMAGE_WIDTH:
- raise ValueError("max_width greater than IMAGE_WIDTH")
+ max_width = (
+ int(self.emnist.dims[2] * (self.max_length + 1) * (1 - self.min_overlap))
+ + IMAGE_X_PADDING
+ )
+
+ if max_width >= IMAGE_WIDTH:
+ raise ValueError(
+ f"max_width {max_width} greater than IMAGE_WIDTH {IMAGE_WIDTH}"
+ )
self.dims = (
self.emnist.dims[0],
- self.emnist.dims[1],
- self.emnist.dims[2] * self.max_length,
+ IMAGE_HEIGHT,
+ IMAGE_WIDTH
)
- if self.max_length <= MAX_OUTPUT_LENGTH:
+ if self.max_length >= MAX_OUTPUT_LENGTH:
raise ValueError("max_length greater than MAX_OUTPUT_LENGTH")
self.output_dims = (MAX_OUTPUT_LENGTH, 1)
@@ -77,8 +87,11 @@ class EMNISTLines(BaseDataModule):
def data_filename(self) -> Path:
"""Return name of dataset."""
return (
- DATA_DIRNAME
- / f"ml_{self.max_length}_o{self.min_overlap:f}_{self.max_overlap:f}_ntr{self.num_train}_ntv{self.num_val}_nte{self.num_test}_{self.with_start_end_tokens}.h5"
+ DATA_DIRNAME / (f"ml_{self.max_length}_"
+ f"o{self.min_overlap:f}_{self.max_overlap:f}_"
+ f"ntr{self.num_train}_"
+ f"ntv{self.num_val}_"
+ f"nte{self.num_test}.h5")
)
def prepare_data(self) -> None:
@@ -92,21 +105,28 @@ class EMNISTLines(BaseDataModule):
def setup(self, stage: str = None) -> None:
logger.info("EMNISTLinesDataset loading data from HDF5...")
if stage == "fit" or stage is None:
+ print(self.data_filename)
with h5py.File(self.data_filename, "r") as f:
x_train = f["x_train"][:]
y_train = torch.LongTensor(f["y_train"][:])
x_val = f["x_val"][:]
y_val = torch.LongTensor(f["y_val"][:])
- self.data_train = BaseDataset(x_train, y_train, transform=_get_transform(augment=self.augment))
- self.data_val = BaseDataset(x_val, y_val, transform=_get_transform(augment=self.augment))
+ self.data_train = BaseDataset(
+ x_train, y_train, transform=_get_transform(augment=self.augment)
+ )
+ self.data_val = BaseDataset(
+ x_val, y_val, transform=_get_transform(augment=self.augment)
+ )
if stage == "test" or stage is None:
with h5py.File(self.data_filename, "r") as f:
x_test = f["x_test"][:]
y_test = torch.LongTensor(f["y_test"][:])
- self.data_train = BaseDataset(x_test, y_test, transform=_get_transform(augment=False))
+ self.data_test = BaseDataset(
+ x_test, y_test, transform=_get_transform(augment=False)
+ )
def __repr__(self) -> str:
"""Return str about dataset."""
@@ -132,53 +152,129 @@ class EMNISTLines(BaseDataModule):
def _generate_data(self, split: str) -> None:
logger.info(f"EMNISTLines generating data for {split}...")
- sentence_generator = SentenceGenerator(self.max_length - 2) # Subtract by 2 because start/end token
+ sentence_generator = SentenceGenerator(
+ self.max_length - 2
+ ) # Subtract by 2 because start/end token
emnist = self.emnist
emnist.prepare_data()
emnist.setup()
if split == "train":
- samples_by_char = _get_samples_by_char(emnist.x_train, emnist.y_train, emnist.mapping)
+ samples_by_char = _get_samples_by_char(
+ emnist.x_train, emnist.y_train, emnist.mapping
+ )
num = self.num_train
elif split == "val":
- samples_by_char = _get_samples_by_char(emnist.x_train, emnist.y_train, emnist.mapping)
+ samples_by_char = _get_samples_by_char(
+ emnist.x_train, emnist.y_train, emnist.mapping
+ )
num = self.num_val
- elif split == "test":
- samples_by_char = _get_samples_by_char(emnist.x_test, emnist.y_test, emnist.mapping)
+ else:
+ samples_by_char = _get_samples_by_char(
+ emnist.x_test, emnist.y_test, emnist.mapping
+ )
num = self.num_test
DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
- with h5py.File(self.data_filename, "w") as f:
+ with h5py.File(self.data_filename, "a") as f:
x, y = _create_dataset_of_images(
- num, samples_by_char, sentence_generator, self.min_overlap, self.max_overlap, self.dims
- )
- y = _convert_strings_to_labels(
- y,
- emnist.inverse_mapping,
- length=MAX_OUTPUT_LENGTH
- )
+ num,
+ samples_by_char,
+ sentence_generator,
+ self.min_overlap,
+ self.max_overlap,
+ self.dims,
+ )
+ y = convert_strings_to_labels(
+ y, emnist.inverse_mapping, length=MAX_OUTPUT_LENGTH
+ )
f.create_dataset(f"x_{split}", data=x, dtype="u1", compression="lzf")
f.create_dataset(f"y_{split}", data=y, dtype="u1", compression="lzf")
-def _get_samples_by_char(samples: np.ndarray, labels: np.ndarray, mapping: Dict) -> defaultdict:
+
+def _get_samples_by_char(
+ samples: np.ndarray, labels: np.ndarray, mapping: Dict
+) -> defaultdict:
samples_by_char = defaultdict(list)
for sample, label in zip(samples, labels):
samples_by_char[mapping[label]].append(sample)
return samples_by_char
-def _construct_image_from_string():
- pass
-
-
def _select_letter_samples_for_string(string: str, samples_by_char: defaultdict):
- pass
-
-
-def _create_dataset_of_images(num_samples: int, samples_by_char: defaultdict, sentence_generator: SentenceGenerator, min_overlap: float, max_overlap: float, dims: Tuple) -> Tuple[torch.Tensor, torch.Tensor]:
+ null_image = torch.zeros((28, 28), dtype=torch.uint8)
+ sample_image_by_char = {}
+ for char in string:
+ if char in sample_image_by_char:
+ continue
+ samples = samples_by_char[char]
+ sample = samples[np.random.choice(len(samples))] if samples else null_image
+ sample_image_by_char[char] = sample.reshape(28, 28)
+ return [sample_image_by_char[char] for char in string]
+
+
+def _construct_image_from_string(
+ string: str,
+ samples_by_char: defaultdict,
+ min_overlap: float,
+ max_overlap: float,
+ width: int,
+) -> torch.Tensor:
+ overlap = np.random.uniform(min_overlap, max_overlap)
+ sampled_images = _select_letter_samples_for_string(string, samples_by_char)
+ N = len(sampled_images)
+ H, W = sampled_images[0].shape
+ next_overlap_width = W - int(overlap * W)
+ concatenated_image = torch.zeros((H, width), dtype=torch.uint8)
+ x = IMAGE_X_PADDING
+ for image in sampled_images:
+ concatenated_image[:, x : (x + W)] += image
+ x += next_overlap_width
+ return torch.minimum(torch.Tensor([255]), concatenated_image)
+
+
+def _create_dataset_of_images(
+ num_samples: int,
+ samples_by_char: defaultdict,
+ sentence_generator: SentenceGenerator,
+ min_overlap: float,
+ max_overlap: float,
+ dims: Tuple,
+) -> Tuple[torch.Tensor, torch.Tensor]:
images = torch.zeros((num_samples, IMAGE_HEIGHT, dims[2]))
labels = []
for n in range(num_samples):
label = sentence_generator.generate()
- crop = _construct_image_from_string()
+ crop = _construct_image_from_string(
+ label, samples_by_char, min_overlap, max_overlap, dims[-1]
+ )
+ height = crop.shape[0]
+ y = (IMAGE_HEIGHT - height) // 2
+ images[n, y : (y + height), :] = crop
+ labels.append(label)
+ return images, labels
+
+
+def _get_transform(augment: bool = False) -> Callable:
+ if not augment:
+ return transforms.Compose([transforms.ToTensor()])
+ return transforms.Compose(
+ [
+ transforms.ToTensor(),
+ transforms.ColorJitter(brightness=(0.5, 1.0)),
+ transforms.RandomAffine(
+ degrees=3,
+ translate=(0.0, 0.05),
+ scale=(0.4, 1.1),
+ shear=(-40, 50),
+ interpolation=InterpolationMode.BILINEAR,
+ fill=0,
+ ),
+ ]
+ )
+
+
+def generate_emnist_lines() -> None:
+ """Generates a synthetic handwritten dataset and displays info,"""
+ load_and_print_info(EMNISTLines)
diff --git a/text_recognizer/datasets/sentence_generator.py b/text_recognizer/datasets/sentence_generator.py
index dd76652..53b781c 100644
--- a/text_recognizer/datasets/sentence_generator.py
+++ b/text_recognizer/datasets/sentence_generator.py
@@ -11,7 +11,7 @@ import numpy as np
from text_recognizer.datasets.util import DATA_DIRNAME
-NLTK_DATA_DIRNAME = DATA_DIRNAME / "raw" / "nltk"
+NLTK_DATA_DIRNAME = DATA_DIRNAME / "downloaded" / "nltk"
class SentenceGenerator:
@@ -47,18 +47,22 @@ class SentenceGenerator:
raise ValueError(
"Must provide max_length to this method or when making this object."
)
-
- index = np.random.randint(0, len(self.word_start_indices) - 1)
- start_index = self.word_start_indices[index]
- end_index_candidates = []
- for index in range(index + 1, len(self.word_start_indices)):
- if self.word_start_indices[index] - start_index > max_length:
- break
- end_index_candidates.append(self.word_start_indices[index])
- end_index = np.random.choice(end_index_candidates)
- sampled_text = self.corpus[start_index:end_index].strip()
- padding = "_" * (max_length - len(sampled_text))
- return sampled_text + padding
+
+ for _ in range(10):
+ try:
+ index = np.random.randint(0, len(self.word_start_indices) - 1)
+ start_index = self.word_start_indices[index]
+ end_index_candidates = []
+ for index in range(index + 1, len(self.word_start_indices)):
+ if self.word_start_indices[index] - start_index > max_length:
+ break
+ end_index_candidates.append(self.word_start_indices[index])
+ end_index = np.random.choice(end_index_candidates)
+ sampled_text = self.corpus[start_index:end_index].strip()
+ return sampled_text
+ except Exception:
+ pass
+ raise RuntimeError("Was not able to generate a valid string")
def brown_corpus() -> str: