From 09f9eab02ef40b1ca26e4693ad77f1f2df79a945 Mon Sep 17 00:00:00 2001
From: Gustaf Rydholm <gustaf.rydholm@gmail.com>
Date: Sat, 2 Sep 2023 01:52:15 +0200
Subject: Update in notebooks

---
 notebooks/03-look-at-iam-lines.ipynb | 153 +++++++++++++++++++++++++++++++++--
 1 file changed, 146 insertions(+), 7 deletions(-)

(limited to 'notebooks/03-look-at-iam-lines.ipynb')

diff --git a/notebooks/03-look-at-iam-lines.ipynb b/notebooks/03-look-at-iam-lines.ipynb
index 3d71c3c..9e9b24c 100644
--- a/notebooks/03-look-at-iam-lines.ipynb
+++ b/notebooks/03-look-at-iam-lines.ipynb
@@ -64,12 +64,12 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_20508/3921102733.py:2: UserWarning: \n",
+      "/tmp/ipykernel_3955/3921102733.py:2: UserWarning: \n",
       "The version_base parameter is not specified.\n",
       "Please specify a compatability version level, or None.\n",
       "Will assume defaults for version 1.1\n",
       "  with initialize(config_path=str(path.parent)):\n",
-      "/tmp/ipykernel_20508/3921102733.py:2: UserWarning: \n",
+      "/tmp/ipykernel_3955/3921102733.py:2: UserWarning: \n",
       "The version_base parameter is not specified.\n",
       "Please specify a compatability version level, or None.\n",
       "Will assume defaults for version 1.1\n",
@@ -108,8 +108,8 @@
       "Input dims: (1, 56, 1024)\n",
       "Output dims: (89, 1)\n",
       "Train/val/test sizes: 10255, 1140, 1958\n",
-      "Train Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0388), tensor(0.1288), tensor(1.))\n",
-      "Train Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(52))\n",
+      "Train Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0361), tensor(0.1067), tensor(1.))\n",
+      "Train Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(56))\n",
       "Test Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0333), tensor(0.0951), tensor(0.8627))\n",
       "Test Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(52))\n",
       "\n"
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -134,7 +134,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 22,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[[0., 0., 0.,  ..., 0., 0., 0.],\n",
+       "          [0., 0., 0.,  ..., 0., 0., 0.],\n",
+       "          [0., 0., 0.,  ..., 0., 0., 0.],\n",
+       "          ...,\n",
+       "          [0., 0., 0.,  ..., 0., 0., 0.],\n",
+       "          [0., 0., 0.,  ..., 0., 0., 0.],\n",
+       "          [0., 0., 0.,  ..., 0., 0., 0.]]]),\n",
+       " tensor([ 1, 32, 27, 14, 33, 16, 21, 22, 27, 20, 40, 14, 33, 40, 33, 21, 18, 40,\n",
+       "         29, 31, 28, 29, 40, 21, 28, 25, 17, 22, 27, 20, 40, 33, 21, 18, 40, 15,\n",
+       "         14, 31, 20, 18, 45, 32,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,\n",
+       "          3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,\n",
+       "          3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3]))"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "datamodule.data_val[16]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -143,7 +176,7 @@
        "'union would be prepared to reach'"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -155,6 +188,112 @@
     "convert_y_label_to_string(dataset[0][1])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['<b>',\n",
+       " '<s>',\n",
+       " '<e>',\n",
+       " '<p>',\n",
+       " '0',\n",
+       " '1',\n",
+       " '2',\n",
+       " '3',\n",
+       " '4',\n",
+       " '5',\n",
+       " '6',\n",
+       " '7',\n",
+       " '8',\n",
+       " '9',\n",
+       " 'a',\n",
+       " 'b',\n",
+       " 'c',\n",
+       " 'd',\n",
+       " 'e',\n",
+       " 'f',\n",
+       " 'g',\n",
+       " 'h',\n",
+       " 'i',\n",
+       " 'j',\n",
+       " 'k',\n",
+       " 'l',\n",
+       " 'm',\n",
+       " 'n',\n",
+       " 'o',\n",
+       " 'p',\n",
+       " 'q',\n",
+       " 'r',\n",
+       " 's',\n",
+       " 't',\n",
+       " 'u',\n",
+       " 'v',\n",
+       " 'w',\n",
+       " 'x',\n",
+       " 'y',\n",
+       " 'z',\n",
+       " ' ',\n",
+       " '!',\n",
+       " '\"',\n",
+       " '#',\n",
+       " '&',\n",
+       " \"'\",\n",
+       " '(',\n",
+       " ')',\n",
+       " '*',\n",
+       " '+',\n",
+       " ',',\n",
+       " '-',\n",
+       " '.',\n",
+       " '/',\n",
+       " ':',\n",
+       " ';',\n",
+       " '?',\n",
+       " '\\n']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "datamodule.tokenizer.mapping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([ 1, 34, 27, 22, 28, 27, 40, 36, 28, 34, 25, 17, 40, 15, 18, 40, 29, 31,\n",
+       "        18, 29, 14, 31, 18, 17, 40, 33, 28, 40, 31, 18, 14, 16, 21,  2,  3,  3,\n",
+       "         3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,\n",
+       "         3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,\n",
+       "         3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x =dataset[0][1]\n",
+    "x["
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 10,
-- 
cgit v1.2.3-70-g09d2