From bd4bd443f339e95007bfdabf3e060db720f4d4b9 Mon Sep 17 00:00:00 2001
From: Gustaf Rydholm <gustaf.rydholm@gmail.com>
Date: Tue, 3 Aug 2021 18:18:48 +0200
Subject: Training working, multiple bug fixes

---
 notebooks/00-scratch-pad.ipynb | 304 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 291 insertions(+), 13 deletions(-)

(limited to 'notebooks/00-scratch-pad.ipynb')

diff --git a/notebooks/00-scratch-pad.ipynb b/notebooks/00-scratch-pad.ipynb
index 0350727..a193107 100644
--- a/notebooks/00-scratch-pad.ipynb
+++ b/notebooks/00-scratch-pad.ipynb
@@ -2,18 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
@@ -33,8 +24,295 @@
     "\n",
     "from text_recognizer.networks.transformer.vit import ViT\n",
     "from text_recognizer.networks.transformer.transformer import Transformer\n",
-    "from text_recognizer.networks.transformer.layers import Decoder\n",
-    "from text_recognizer.networks.transformer.nystromer.nystromer import Nystromer"
+    "from text_recognizer.networks.transformer.layers import Decoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.cuda.is_available()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss = nn.CrossEntropyLoss()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "o = torch.randn((4, 5, 4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = torch.randint(0, 5, (4, 4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([4, 5, 4])"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "o.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([4, 4])"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[0, 1, 3, 2],\n",
+       "        [1, 4, 4, 4],\n",
+       "        [1, 4, 2, 1],\n",
+       "        [2, 0, 4, 4]])"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[[ 0.0647, -1.3831,  0.0266,  0.8528],\n",
+       "         [ 1.4976,  0.4153,  1.0353,  0.0154],\n",
+       "         [ 1.4562, -0.3568,  0.3599, -0.6222],\n",
+       "         [ 0.2773,  0.4563,  0.9282, -2.1445],\n",
+       "         [ 0.5191,  0.3683, -0.3469,  0.1355]],\n",
+       "\n",
+       "        [[ 0.0424, -0.3215,  0.5662, -0.4217],\n",
+       "         [ 2.0793,  1.2817,  0.1559, -0.6900],\n",
+       "         [-1.1751, -0.3359,  1.7875, -0.3671],\n",
+       "         [-0.4553, -0.3952, -0.8633,  0.1538],\n",
+       "         [-1.3862,  0.4255, -2.2948,  0.0312]],\n",
+       "\n",
+       "        [[-1.4257,  2.2662,  0.2670, -0.4330],\n",
+       "         [-0.3244, -0.8669, -0.2571,  0.8028],\n",
+       "         [ 0.9109, -0.2289, -1.2095, -0.9761],\n",
+       "         [-0.0156,  1.2403, -1.1967,  0.6841],\n",
+       "         [-0.8185,  0.2967, -2.1639, -0.7903]],\n",
+       "\n",
+       "        [[-1.0425,  0.1426,  0.1383,  0.9784],\n",
+       "         [-1.2853,  1.4123, -0.2272, -0.3335],\n",
+       "         [ 1.5751, -0.7663,  0.9610,  0.5686],\n",
+       "         [ 0.9697, -1.5515, -0.8658, -0.5882],\n",
+       "         [-1.2467,  0.0539,  0.1208, -1.0297]]])"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "o"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor(1.8355)"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "loss(o, t)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "unsupported operand type(s) for |: 'int' and 'Tensor'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_9275/1867668791.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for |: 'int' and 'Tensor'"
+     ]
+    }
+   ],
+   "source": [
+    "t[:, 2] == 2 | t[:, 2] == 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([4, 1])"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.argmax(o, dim=-1)[:, -1:].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LabelSmoothingLossCanonical(nn.Module):\n",
+    "    def __init__(self, smoothing=0.0, dim=-1):\n",
+    "        super(LabelSmoothingLossCanonical, self).__init__()\n",
+    "        self.confidence = 1.0 - smoothing\n",
+    "        self.smoothing = smoothing\n",
+    "        self.dim = dim\n",
+    "\n",
+    "    def forward(self, pred, target):\n",
+    "        pred = pred.log_softmax(dim=self.dim)\n",
+    "        with torch.no_grad():\n",
+    "            # true_dist = pred.data.clone()\n",
+    "            true_dist = torch.zeros_like(pred)\n",
+    "            print(true_dist.shape)\n",
+    "            true_dist.scatter_(1, target.unsqueeze(1), self.confidence)\n",
+    "            print(true_dist.shape)\n",
+    "            print(true_dist)\n",
+    "            true_dist.masked_fill_((target == 4).unsqueeze(1), 0)\n",
+    "            print(true_dist)\n",
+    "            true_dist += self.smoothing / pred.size(self.dim)\n",
+    "        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l = LabelSmoothingLossCanonical(0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([1, 5, 4])\n",
+      "torch.Size([1, 5, 4])\n",
+      "tensor([[[0.0000, 0.0000, 0.0000, 0.0000],\n",
+      "         [0.0000, 0.0000, 0.0000, 0.0000],\n",
+      "         [0.9000, 0.9000, 0.0000, 0.9000],\n",
+      "         [0.0000, 0.0000, 0.0000, 0.0000],\n",
+      "         [0.0000, 0.0000, 0.9000, 0.0000]]])\n",
+      "tensor([[[0.0000, 0.0000, 0.0000, 0.0000],\n",
+      "         [0.0000, 0.0000, 0.0000, 0.0000],\n",
+      "         [0.9000, 0.9000, 0.0000, 0.9000],\n",
+      "         [0.0000, 0.0000, 0.0000, 0.0000],\n",
+      "         [0.0000, 0.0000, 0.0000, 0.0000]]])\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "tensor(0.9438)"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "l(o, t)"
    ]
   },
   {
-- 
cgit v1.2.3-70-g09d2