From 09f9eab02ef40b1ca26e4693ad77f1f2df79a945 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Sat, 2 Sep 2023 01:52:15 +0200 Subject: Update in notebooks --- notebooks/03-look-at-iam-lines.ipynb | 153 +++++++++++++++++++++++++++++++++-- 1 file changed, 146 insertions(+), 7 deletions(-) (limited to 'notebooks/03-look-at-iam-lines.ipynb') diff --git a/notebooks/03-look-at-iam-lines.ipynb b/notebooks/03-look-at-iam-lines.ipynb index 3d71c3c..9e9b24c 100644 --- a/notebooks/03-look-at-iam-lines.ipynb +++ b/notebooks/03-look-at-iam-lines.ipynb @@ -64,12 +64,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_20508/3921102733.py:2: UserWarning: \n", + "/tmp/ipykernel_3955/3921102733.py:2: UserWarning: \n", "The version_base parameter is not specified.\n", "Please specify a compatability version level, or None.\n", "Will assume defaults for version 1.1\n", " with initialize(config_path=str(path.parent)):\n", - "/tmp/ipykernel_20508/3921102733.py:2: UserWarning: \n", + "/tmp/ipykernel_3955/3921102733.py:2: UserWarning: \n", "The version_base parameter is not specified.\n", "Please specify a compatability version level, or None.\n", "Will assume defaults for version 1.1\n", @@ -108,8 +108,8 @@ "Input dims: (1, 56, 1024)\n", "Output dims: (89, 1)\n", "Train/val/test sizes: 10255, 1140, 1958\n", - "Train Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0388), tensor(0.1288), tensor(1.))\n", - "Train Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(52))\n", + "Train Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0361), tensor(0.1067), tensor(1.))\n", + "Train Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(56))\n", "Test Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0333), tensor(0.0951), tensor(0.8627))\n", "Test Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(52))\n", "\n" @@ -125,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -134,7 +134,40 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(tensor([[[0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 0., 0.]]]),\n", + " tensor([ 1, 32, 27, 14, 33, 16, 21, 22, 27, 20, 40, 14, 33, 40, 33, 21, 18, 40,\n", + " 29, 31, 28, 29, 40, 21, 28, 25, 17, 22, 27, 20, 40, 33, 21, 18, 40, 15,\n", + " 14, 31, 20, 18, 45, 32, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n", + " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n", + " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]))" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datamodule.data_val[16]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -143,7 +176,7 @@ "'union would be prepared to reach'" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -155,6 +188,112 @@ "convert_y_label_to_string(dataset[0][1])" ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['',\n", + " '',\n", + " '',\n", + " '

',\n", + " '0',\n", + " '1',\n", + " '2',\n", + " '3',\n", + " '4',\n", + " '5',\n", + " '6',\n", + " '7',\n", + " '8',\n", + " '9',\n", + " 'a',\n", + " 'b',\n", + " 'c',\n", + " 'd',\n", + " 'e',\n", + " 'f',\n", + " 'g',\n", + " 'h',\n", + " 'i',\n", + " 'j',\n", + " 'k',\n", + " 'l',\n", + " 'm',\n", + " 'n',\n", + " 'o',\n", + " 'p',\n", + " 'q',\n", + " 'r',\n", + " 's',\n", + " 't',\n", + " 'u',\n", + " 'v',\n", + " 'w',\n", + " 'x',\n", + " 'y',\n", + " 'z',\n", + " ' ',\n", + " '!',\n", + " '\"',\n", + " '#',\n", + " '&',\n", + " \"'\",\n", + " '(',\n", + " ')',\n", + " '*',\n", + " '+',\n", + " ',',\n", + " '-',\n", + " '.',\n", + " '/',\n", + " ':',\n", + " ';',\n", + " '?',\n", + " '\\n']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datamodule.tokenizer.mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([ 1, 34, 27, 22, 28, 27, 40, 36, 28, 34, 25, 17, 40, 15, 18, 40, 29, 31,\n", + " 18, 29, 14, 31, 18, 17, 40, 33, 28, 40, 31, 18, 14, 16, 21, 2, 3, 3,\n", + " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n", + " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n", + " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x =dataset[0][1]\n", + "x[" + ] + }, { "cell_type": "code", "execution_count": 10, -- cgit v1.2.3-70-g09d2