{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "import torch\n",
    "from importlib.util import find_spec\n",
    "if find_spec(\"text_recognizer\") is None:\n",
    "    import sys\n",
    "    sys.path.append('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from text_recognizer.datasets import EmnistDataLoaders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_loaders = EmnistDataLoaders(splits=[\"val\"], sample_to_balance=True,\n",
    "        subsample_fraction = None,\n",
    "        transform = None,\n",
    "        target_transform = None,\n",
    "        batch_size = 1,\n",
    "        shuffle = True,\n",
    "        num_workers  = 0,\n",
    "        cuda = False,\n",
    "        seed = 4711)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset EMNIST\n",
       "    Number of datapoints: 55908\n",
       "    Root location: /home/akternurra/Documents/projects/quest-for-general-artifical-intelligence/projects/text-recognizer/data\n",
       "    Split: Test\n",
       "    StandardTransform\n",
       "Transform: Compose(\n",
       "               <text_recognizer.datasets.util.Transpose object at 0x7fc764a10eb0>\n",
       "               ToTensor()\n",
       "           )"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_loaders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from text_recognizer.datasets import EmnistDataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = EmnistDataset()\n",
    "dataset.load_emnist_dataset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([3, 28, 28])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.data[[1, 2, 3]].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([18, 36,  0,  ..., 28,  0,  5])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: '0',\n",
       " 1: '1',\n",
       " 2: '2',\n",
       " 3: '3',\n",
       " 4: '4',\n",
       " 5: '5',\n",
       " 6: '6',\n",
       " 7: '7',\n",
       " 8: '8',\n",
       " 9: '9',\n",
       " 10: 'A',\n",
       " 11: 'B',\n",
       " 12: 'C',\n",
       " 13: 'D',\n",
       " 14: 'E',\n",
       " 15: 'F',\n",
       " 16: 'G',\n",
       " 17: 'H',\n",
       " 18: 'I',\n",
       " 19: 'J',\n",
       " 20: 'K',\n",
       " 21: 'L',\n",
       " 22: 'M',\n",
       " 23: 'N',\n",
       " 24: 'O',\n",
       " 25: 'P',\n",
       " 26: 'Q',\n",
       " 27: 'R',\n",
       " 28: 'S',\n",
       " 29: 'T',\n",
       " 30: 'U',\n",
       " 31: 'V',\n",
       " 32: 'W',\n",
       " 33: 'X',\n",
       " 34: 'Y',\n",
       " 35: 'Z',\n",
       " 36: 'a',\n",
       " 37: 'b',\n",
       " 38: 'c',\n",
       " 39: 'd',\n",
       " 40: 'e',\n",
       " 41: 'f',\n",
       " 42: 'g',\n",
       " 43: 'h',\n",
       " 44: 'i',\n",
       " 45: 'j',\n",
       " 46: 'k',\n",
       " 47: 'l',\n",
       " 48: 'm',\n",
       " 49: 'n',\n",
       " 50: 'o',\n",
       " 51: 'p',\n",
       " 52: 'q',\n",
       " 53: 'r',\n",
       " 54: 's',\n",
       " 55: 't',\n",
       " 56: 'u',\n",
       " 57: 'v',\n",
       " 58: 'w',\n",
       " 59: 'x',\n",
       " 60: 'y',\n",
       " 61: 'z',\n",
       " 62: ' ',\n",
       " 63: '!',\n",
       " 64: '\"',\n",
       " 65: '#',\n",
       " 66: '&',\n",
       " 67: \"'\",\n",
       " 68: '(',\n",
       " 69: ')',\n",
       " 70: '*',\n",
       " 71: '+',\n",
       " 72: ',',\n",
       " 73: '-',\n",
       " 74: '.',\n",
       " 75: '/',\n",
       " 76: ':',\n",
       " 77: ';',\n",
       " 78: '?',\n",
       " 79: '_'}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([18, 36,  0,  ..., 28,  0,  5])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.randint(0, len(data_loader(\"val\").dataset.data), 4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_loader(\"val\").dataset.targets[np.random.randint(0, len(data_loader(\"val\").dataset.data), 4)].numpy()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "len(data_loader(\"val\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = data_loader(\"val\").dataset.data[np.random.randint(0, len(data_loader(\"val\").dataset.data), 4)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"accuracy\" in \"val_accuracy\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x[0].dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x[0].type(\"torch.FloatTensor\") / 255"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy().reshape(28, 28).swapaxes(0, 1)\n",
    "plt.imshow(x, cmap='gray')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fix below with new data loader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "classes = load_emnist_mapping()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_dl_images(dl, batch_size, classes):\n",
    "    fig = plt.figure(figsize=(9, 9))\n",
    "    batch = next(iter(dl))\n",
    "    for i in range(batch_size):\n",
    "        x, y = batch[0][i], batch[1][i]\n",
    "        ax = fig.add_subplot(3, 3, i + 1)\n",
    "        x = x.squeeze(0).numpy()\n",
    "        ax.imshow(x, cmap='gray')\n",
    "        ax.set_xticks([])\n",
    "        ax.set_yticks([])\n",
    "        ax.set_title(classes[int(y)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_images(dataset, classes, shift=0):\n",
    "    fig = plt.figure(figsize=(9, 9))\n",
    "    for i in range(9):\n",
    "        x, y = dataset[i + shift]\n",
    "        ax = fig.add_subplot(3, 3, i + 1)\n",
    "        x = x.squeeze(0).numpy()\n",
    "        ax.imshow(x, cmap='gray')\n",
    "        ax.set_xticks([])\n",
    "        ax.set_yticks([])\n",
    "        ax.set_title(classes[int(y)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if a:\n",
    "    print(\"afaf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "classes = load_emnist_mapping()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display_images(dataset, classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display_dl_images(dl, 9, classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display_dl_images(dl, 9, classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display_dl_images(dl, 9, classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(9, 9))\n",
    "for i, j in enumerate([5, 7, 9]):\n",
    "    x, y = dataset[j]\n",
    "    ax = fig.add_subplot(3, 3, i + 1)\n",
    "    x = x.numpy().reshape(28, 28).swapaxes(0, 1)\n",
    "    ax.imshow(x, cmap='gray')\n",
    "    ax.set_xticks([])\n",
    "    ax.set_yticks([])\n",
    "    ax.set_title(classes[int(y)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}