summaryrefslogtreecommitdiff
path: root/notebooks/03-look-at-iam-lines.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'notebooks/03-look-at-iam-lines.ipynb')
-rw-r--r--notebooks/03-look-at-iam-lines.ipynb153
1 files changed, 146 insertions, 7 deletions
diff --git a/notebooks/03-look-at-iam-lines.ipynb b/notebooks/03-look-at-iam-lines.ipynb
index 3d71c3c..9e9b24c 100644
--- a/notebooks/03-look-at-iam-lines.ipynb
+++ b/notebooks/03-look-at-iam-lines.ipynb
@@ -64,12 +64,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_20508/3921102733.py:2: UserWarning: \n",
+ "/tmp/ipykernel_3955/3921102733.py:2: UserWarning: \n",
"The version_base parameter is not specified.\n",
"Please specify a compatability version level, or None.\n",
"Will assume defaults for version 1.1\n",
" with initialize(config_path=str(path.parent)):\n",
- "/tmp/ipykernel_20508/3921102733.py:2: UserWarning: \n",
+ "/tmp/ipykernel_3955/3921102733.py:2: UserWarning: \n",
"The version_base parameter is not specified.\n",
"Please specify a compatability version level, or None.\n",
"Will assume defaults for version 1.1\n",
@@ -108,8 +108,8 @@
"Input dims: (1, 56, 1024)\n",
"Output dims: (89, 1)\n",
"Train/val/test sizes: 10255, 1140, 1958\n",
- "Train Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0388), tensor(0.1288), tensor(1.))\n",
- "Train Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(52))\n",
+ "Train Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0361), tensor(0.1067), tensor(1.))\n",
+ "Train Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(56))\n",
"Test Batch x stats: (torch.Size([8, 1, 56, 1024]), torch.float32, tensor(0.), tensor(0.0333), tensor(0.0951), tensor(0.8627))\n",
"Test Batch y stats: (torch.Size([8, 89]), torch.int64, tensor(1), tensor(52))\n",
"\n"
@@ -125,7 +125,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -134,7 +134,40 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 22,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(tensor([[[0., 0., 0., ..., 0., 0., 0.],\n",
+ " [0., 0., 0., ..., 0., 0., 0.],\n",
+ " [0., 0., 0., ..., 0., 0., 0.],\n",
+ " ...,\n",
+ " [0., 0., 0., ..., 0., 0., 0.],\n",
+ " [0., 0., 0., ..., 0., 0., 0.],\n",
+ " [0., 0., 0., ..., 0., 0., 0.]]]),\n",
+ " tensor([ 1, 32, 27, 14, 33, 16, 21, 22, 27, 20, 40, 14, 33, 40, 33, 21, 18, 40,\n",
+ " 29, 31, 28, 29, 40, 21, 28, 25, 17, 22, 27, 20, 40, 33, 21, 18, 40, 15,\n",
+ " 14, 31, 20, 18, 45, 32, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
+ " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
+ " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]))"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "datamodule.data_val[16]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -143,7 +176,7 @@
"'union would be prepared to reach'"
]
},
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -157,6 +190,112 @@
},
{
"cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['<b>',\n",
+ " '<s>',\n",
+ " '<e>',\n",
+ " '<p>',\n",
+ " '0',\n",
+ " '1',\n",
+ " '2',\n",
+ " '3',\n",
+ " '4',\n",
+ " '5',\n",
+ " '6',\n",
+ " '7',\n",
+ " '8',\n",
+ " '9',\n",
+ " 'a',\n",
+ " 'b',\n",
+ " 'c',\n",
+ " 'd',\n",
+ " 'e',\n",
+ " 'f',\n",
+ " 'g',\n",
+ " 'h',\n",
+ " 'i',\n",
+ " 'j',\n",
+ " 'k',\n",
+ " 'l',\n",
+ " 'm',\n",
+ " 'n',\n",
+ " 'o',\n",
+ " 'p',\n",
+ " 'q',\n",
+ " 'r',\n",
+ " 's',\n",
+ " 't',\n",
+ " 'u',\n",
+ " 'v',\n",
+ " 'w',\n",
+ " 'x',\n",
+ " 'y',\n",
+ " 'z',\n",
+ " ' ',\n",
+ " '!',\n",
+ " '\"',\n",
+ " '#',\n",
+ " '&',\n",
+ " \"'\",\n",
+ " '(',\n",
+ " ')',\n",
+ " '*',\n",
+ " '+',\n",
+ " ',',\n",
+ " '-',\n",
+ " '.',\n",
+ " '/',\n",
+ " ':',\n",
+ " ';',\n",
+ " '?',\n",
+ " '\\n']"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "datamodule.tokenizer.mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([ 1, 34, 27, 22, 28, 27, 40, 36, 28, 34, 25, 17, 40, 15, 18, 40, 29, 31,\n",
+ " 18, 29, 14, 31, 18, 17, 40, 33, 28, 40, 31, 18, 14, 16, 21, 2, 3, 3,\n",
+ " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
+ " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
+ " 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x =dataset[0][1]\n",
+ "x["
+ ]
+ },
+ {
+ "cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [