From bd4bd443f339e95007bfdabf3e060db720f4d4b9 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 3 Aug 2021 18:18:48 +0200 Subject: Training working, multiple bug fixes --- notebooks/00-scratch-pad.ipynb | 304 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 291 insertions(+), 13 deletions(-) (limited to 'notebooks/00-scratch-pad.ipynb') diff --git a/notebooks/00-scratch-pad.ipynb b/notebooks/00-scratch-pad.ipynb index 0350727..a193107 100644 --- a/notebooks/00-scratch-pad.ipynb +++ b/notebooks/00-scratch-pad.ipynb @@ -2,18 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -33,8 +24,295 @@ "\n", "from text_recognizer.networks.transformer.vit import ViT\n", "from text_recognizer.networks.transformer.transformer import Transformer\n", - "from text_recognizer.networks.transformer.layers import Decoder\n", - "from text_recognizer.networks.transformer.nystromer.nystromer import Nystromer" + "from text_recognizer.networks.transformer.layers import Decoder" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.cuda.is_available()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "loss = nn.CrossEntropyLoss()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "o = torch.randn((4, 5, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "t = torch.randint(0, 5, (4, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([4, 5, 4])" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "o.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([4, 4])" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0, 1, 3, 2],\n", + " [1, 4, 4, 4],\n", + " [1, 4, 2, 1],\n", + " [2, 0, 4, 4]])" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[ 0.0647, -1.3831, 0.0266, 0.8528],\n", + " [ 1.4976, 0.4153, 1.0353, 0.0154],\n", + " [ 1.4562, -0.3568, 0.3599, -0.6222],\n", + " [ 0.2773, 0.4563, 0.9282, -2.1445],\n", + " [ 0.5191, 0.3683, -0.3469, 0.1355]],\n", + "\n", + " [[ 0.0424, -0.3215, 0.5662, -0.4217],\n", + " [ 2.0793, 1.2817, 0.1559, -0.6900],\n", + " [-1.1751, -0.3359, 1.7875, -0.3671],\n", + " [-0.4553, -0.3952, -0.8633, 0.1538],\n", + " [-1.3862, 0.4255, -2.2948, 0.0312]],\n", + "\n", + " [[-1.4257, 2.2662, 0.2670, -0.4330],\n", + " [-0.3244, -0.8669, -0.2571, 0.8028],\n", + " [ 0.9109, -0.2289, -1.2095, -0.9761],\n", + " [-0.0156, 1.2403, -1.1967, 0.6841],\n", + " [-0.8185, 0.2967, -2.1639, -0.7903]],\n", + "\n", + " [[-1.0425, 0.1426, 0.1383, 0.9784],\n", + " [-1.2853, 1.4123, -0.2272, -0.3335],\n", + " [ 1.5751, -0.7663, 0.9610, 0.5686],\n", + " [ 0.9697, -1.5515, -0.8658, -0.5882],\n", + " [-1.2467, 0.0539, 0.1208, -1.0297]]])" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "o" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(1.8355)" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loss(o, t)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for |: 'int' and 'Tensor'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_9275/1867668791.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for |: 'int' and 'Tensor'" + ] + } + ], + "source": [ + "t[:, 2] == 2 | t[:, 2] == 1" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([4, 1])" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.argmax(o, dim=-1)[:, -1:].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "class LabelSmoothingLossCanonical(nn.Module):\n", + " def __init__(self, smoothing=0.0, dim=-1):\n", + " super(LabelSmoothingLossCanonical, self).__init__()\n", + " self.confidence = 1.0 - smoothing\n", + " self.smoothing = smoothing\n", + " self.dim = dim\n", + "\n", + " def forward(self, pred, target):\n", + " pred = pred.log_softmax(dim=self.dim)\n", + " with torch.no_grad():\n", + " # true_dist = pred.data.clone()\n", + " true_dist = torch.zeros_like(pred)\n", + " print(true_dist.shape)\n", + " true_dist.scatter_(1, target.unsqueeze(1), self.confidence)\n", + " print(true_dist.shape)\n", + " print(true_dist)\n", + " true_dist.masked_fill_((target == 4).unsqueeze(1), 0)\n", + " print(true_dist)\n", + " true_dist += self.smoothing / pred.size(self.dim)\n", + " return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "l = LabelSmoothingLossCanonical(0.1)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1, 5, 4])\n", + "torch.Size([1, 5, 4])\n", + "tensor([[[0.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.9000, 0.9000, 0.0000, 0.9000],\n", + " [0.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.9000, 0.0000]]])\n", + "tensor([[[0.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.9000, 0.9000, 0.0000, 0.9000],\n", + " [0.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000, 0.0000]]])\n" + ] + }, + { + "data": { + "text/plain": [ + "tensor(0.9438)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l(o, t)" ] }, { -- cgit v1.2.3-70-g09d2