summaryrefslogtreecommitdiff
path: root/notebooks/00-scratch-pad.ipynb
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-03 18:18:48 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-03 18:18:48 +0200
commitbd4bd443f339e95007bfdabf3e060db720f4d4b9 (patch)
treee55cb3744904f7c2a0348b100c7e92a65e538a16 /notebooks/00-scratch-pad.ipynb
parent75801019981492eedf9280cb352eea3d8e99b65f (diff)
Training working, multiple bug fixes
Diffstat (limited to 'notebooks/00-scratch-pad.ipynb')
-rw-r--r--notebooks/00-scratch-pad.ipynb304
1 files changed, 291 insertions, 13 deletions
diff --git a/notebooks/00-scratch-pad.ipynb b/notebooks/00-scratch-pad.ipynb
index 0350727..a193107 100644
--- a/notebooks/00-scratch-pad.ipynb
+++ b/notebooks/00-scratch-pad.ipynb
@@ -2,18 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 1,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The autoreload extension is already loaded. To reload it, use:\n",
- " %reload_ext autoreload\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
@@ -33,8 +24,295 @@
"\n",
"from text_recognizer.networks.transformer.vit import ViT\n",
"from text_recognizer.networks.transformer.transformer import Transformer\n",
- "from text_recognizer.networks.transformer.layers import Decoder\n",
- "from text_recognizer.networks.transformer.nystromer.nystromer import Nystromer"
+ "from text_recognizer.networks.transformer.layers import Decoder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "torch.cuda.is_available()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss = nn.CrossEntropyLoss()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "o = torch.randn((4, 5, 4))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = torch.randint(0, 5, (4, 4))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([4, 5, 4])"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "o.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([4, 4])"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "t.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[0, 1, 3, 2],\n",
+ " [1, 4, 4, 4],\n",
+ " [1, 4, 2, 1],\n",
+ " [2, 0, 4, 4]])"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "t"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[[ 0.0647, -1.3831, 0.0266, 0.8528],\n",
+ " [ 1.4976, 0.4153, 1.0353, 0.0154],\n",
+ " [ 1.4562, -0.3568, 0.3599, -0.6222],\n",
+ " [ 0.2773, 0.4563, 0.9282, -2.1445],\n",
+ " [ 0.5191, 0.3683, -0.3469, 0.1355]],\n",
+ "\n",
+ " [[ 0.0424, -0.3215, 0.5662, -0.4217],\n",
+ " [ 2.0793, 1.2817, 0.1559, -0.6900],\n",
+ " [-1.1751, -0.3359, 1.7875, -0.3671],\n",
+ " [-0.4553, -0.3952, -0.8633, 0.1538],\n",
+ " [-1.3862, 0.4255, -2.2948, 0.0312]],\n",
+ "\n",
+ " [[-1.4257, 2.2662, 0.2670, -0.4330],\n",
+ " [-0.3244, -0.8669, -0.2571, 0.8028],\n",
+ " [ 0.9109, -0.2289, -1.2095, -0.9761],\n",
+ " [-0.0156, 1.2403, -1.1967, 0.6841],\n",
+ " [-0.8185, 0.2967, -2.1639, -0.7903]],\n",
+ "\n",
+ " [[-1.0425, 0.1426, 0.1383, 0.9784],\n",
+ " [-1.2853, 1.4123, -0.2272, -0.3335],\n",
+ " [ 1.5751, -0.7663, 0.9610, 0.5686],\n",
+ " [ 0.9697, -1.5515, -0.8658, -0.5882],\n",
+ " [-1.2467, 0.0539, 0.1208, -1.0297]]])"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "o"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor(1.8355)"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "loss(o, t)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "TypeError",
+ "evalue": "unsupported operand type(s) for |: 'int' and 'Tensor'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/tmp/ipykernel_9275/1867668791.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for |: 'int' and 'Tensor'"
+ ]
+ }
+ ],
+ "source": [
+ "t[:, 2] == 2 | t[:, 2] == 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([4, 1])"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "torch.argmax(o, dim=-1)[:, -1:].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class LabelSmoothingLossCanonical(nn.Module):\n",
+ " def __init__(self, smoothing=0.0, dim=-1):\n",
+ " super(LabelSmoothingLossCanonical, self).__init__()\n",
+ " self.confidence = 1.0 - smoothing\n",
+ " self.smoothing = smoothing\n",
+ " self.dim = dim\n",
+ "\n",
+ " def forward(self, pred, target):\n",
+ " pred = pred.log_softmax(dim=self.dim)\n",
+ " with torch.no_grad():\n",
+ " # true_dist = pred.data.clone()\n",
+ " true_dist = torch.zeros_like(pred)\n",
+ " print(true_dist.shape)\n",
+ " true_dist.scatter_(1, target.unsqueeze(1), self.confidence)\n",
+ " print(true_dist.shape)\n",
+ " print(true_dist)\n",
+ " true_dist.masked_fill_((target == 4).unsqueeze(1), 0)\n",
+ " print(true_dist)\n",
+ " true_dist += self.smoothing / pred.size(self.dim)\n",
+ " return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "l = LabelSmoothingLossCanonical(0.1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "torch.Size([1, 5, 4])\n",
+ "torch.Size([1, 5, 4])\n",
+ "tensor([[[0.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.9000, 0.9000, 0.0000, 0.9000],\n",
+ " [0.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 0.0000, 0.9000, 0.0000]]])\n",
+ "tensor([[[0.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.9000, 0.9000, 0.0000, 0.9000],\n",
+ " [0.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 0.0000, 0.0000, 0.0000]]])\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "tensor(0.9438)"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "l(o, t)"
]
},
{