{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[autoreload of text_recognizer.data.emnist failed: Traceback (most recent call last):\n",
" File \"/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 245, in check\n",
" superreload(m, reload, self.old_objects)\n",
" File \"/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 410, in superreload\n",
" update_generic(old_obj, new_obj)\n",
" File \"/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 347, in update_generic\n",
" update(a, b)\n",
" File \"/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 302, in update_class\n",
" if update_generic(old_obj, new_obj): continue\n",
" File \"/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 347, in update_generic\n",
" update(a, b)\n",
" File \"/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 266, in update_function\n",
" setattr(old, name, getattr(new, name))\n",
"ValueError: prepare_data() requires a code object with 1 free vars, not 0\n",
"]\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from PIL import Image\n",
"import torch\n",
"from importlib.util import find_spec\n",
"if find_spec(\"text_recognizer\") is None:\n",
" import sys\n",
" sys.path.append('..')\n",
"from text_recognizer.data.emnist_lines import EMNISTLines"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-03-28 20:17:54.375 | INFO | text_recognizer.data.emnist_lines:setup:103 - EMNISTLinesDataset loading data from HDF5...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/aktersnurra/projects/text-recognizer/data/processed/emnist_lines/ml_32_o0.000000_0.330000_ntr10000_ntv2000_nte2000.h5\n",
"EMNISTLines2 Dataset\n",
"Min overlap: 0.0\n",
"Max overlap: 0.33\n",
"Num classes: 83\n",
"Dims: (1, 56, 1024)\n",
"Output dims: (89, 1)\n",
"Train/val/test sizes: 10000, 2000, 2000\n",
"Batch x stats: (torch.Size([128, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0152), tensor(0.0954), tensor(0.9960))\n",
"Batch y stats: (torch.Size([128, 89]), torch.int64, tensor(1), tensor(66))\n",
"\n",
"Mapping: ['', '', '', '
', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '!', '\"', '#', '&', \"'\", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '?']\n"
]
}
],
"source": [
"dataset = EMNISTLines()\n",
"dataset.prepare_data()\n",
"dataset.setup()\n",
"print(dataset)\n",
"print('Mapping:', dataset.mapping)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def convert_y_label_to_string(y, dataset=dataset):\n",
" return ''.join([dataset.mapping[i] for i in y if i != 3])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([ 1, 45, 60, 53, 43, 58, 66, 42, 54, 52, 55, 51, 40, 48, 53, 48, 53, 46,\n",
" 66, 40, 41, 54, 60, 59, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
" 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
" 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
" 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]) torch.Size([89])\n"
]
},
{
"data": {
"text/plain": [
"'funds complaining about'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_example = dataset.data_train[0][1]\n",
"print(y_example, y_example.shape)\n",
"convert_y_label_to_string(y_example)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"