2 files changed, 72 insertions, 98 deletions
diff --git a/notebooks/03-look-at-iam-paragraphs.ipynb b/notebooks/03-look-at-iam-paragraphs.ipynb
index 5e3a872..76ca6b1 100644
--- a/notebooks/03-look-at-iam-paragraphs.ipynb
+++ b/notebooks/03-look-at-iam-paragraphs.ipynb
@@ -5,7 +5,21 @@
    "execution_count": 1,
    "id": "6ce2519f",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'loguru.logger'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_3883/2979229631.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m     \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'..'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miam_paragraphs\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mIAMParagraphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miam_synthetic_paragraphs\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mIAMSyntheticParagraphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miam_extended_paragraphs\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mIAMExtendedParagraphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/projects/text-recognizer/text_recognizer/data/iam_paragraphs.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0memnist\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0memnist_mapping\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miam\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mIAM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmappings\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mWordPieceMapping\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     23\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransforms\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mWordPiece\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/projects/text-recognizer/text_recognizer/data/mappings.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mloguru\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogger\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mlog\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorch\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'loguru.logger'"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
     "os.environ['CUDA_VISIBLE_DEVICE'] = ''\n",
@@ -31,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "726ac25b",
    "metadata": {},
    "outputs": [],
diff --git a/notebooks/05c-test-model-end-to-end.ipynb b/notebooks/05c-test-model-end-to-end.ipynb
index a96e484..b652bdd 100644
--- a/notebooks/05c-test-model-end-to-end.ipynb
+++ b/notebooks/05c-test-model-end-to-end.ipynb
@@ -26,16 +26,6 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "3e812a1e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import attr"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
    "id": "d3a6146b-94b1-4618-a4e4-00f8e23ffdb0",
    "metadata": {},
    "outputs": [],
@@ -47,193 +37,163 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "8741a844-3b97-47c4-a2a1-5a268d40923c",
+   "execution_count": 3,
+   "id": "6b722ca0-9c65-4f90-be4e-b7334ea81237",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "_target_: text_recognizer.data.mappings.WordPieceMapping\n",
-      "num_features: 1000\n",
-      "tokens: iamdb_1kwp_tokens_1000.txt\n",
-      "lexicon: iamdb_1kwp_lex_1000.txt\n",
-      "data_dir: null\n",
-      "use_words: false\n",
-      "prepend_wordsep: false\n",
-      "special_tokens:\n",
+      "mapping:\n",
+      "  _target_: text_recognizer.data.mappings.WordPieceMapping\n",
+      "  num_features: 1000\n",
+      "  tokens: iamdb_1kwp_tokens_1000.txt\n",
+      "  lexicon: iamdb_1kwp_lex_1000.txt\n",
+      "  data_dir: null\n",
+      "  use_words: false\n",
+      "  prepend_wordsep: false\n",
+      "  special_tokens:\n",
+      "  - <s>\n",
+      "  - <e>\n",
+      "  - <p>\n",
+      "  extra_symbols:\n",
+      "  - \\n\n",
+      "_target_: text_recognizer.models.transformer.TransformerLitModel\n",
+      "interval: step\n",
+      "monitor: val/loss\n",
+      "ignore_tokens:\n",
       "- <s>\n",
       "- <e>\n",
       "- <p>\n",
-      "extra_symbols:\n",
-      "- '\n",
+      "start_token: <s>\n",
+      "end_token: <e>\n",
+      "pad_token: <p>\n",
       "\n",
-      "  '\n",
-      "\n",
-      "{'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['<s>', '<e>', '<p>'], 'extra_symbols': ['\\n']}\n"
+      "{'mapping': {'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['<s>', '<e>', '<p>'], 'extra_symbols': ['\\\\n']}, '_target_': 'text_recognizer.models.transformer.TransformerLitModel', 'interval': 'step', 'monitor': 'val/loss', 'ignore_tokens': ['<s>', '<e>', '<p>'], 'start_token': '<s>', 'end_token': '<e>', 'pad_token': '<p>'}\n"
      ]
     }
    ],
    "source": [
     "# context initialization\n",
-    "with initialize(config_path=\"../training/conf/model/mapping\", job_name=\"test_app\"):\n",
-    "    cfg = compose(config_name=\"word_piece\")\n",
+    "with initialize(config_path=\"../training/conf/model/\", job_name=\"test_app\"):\n",
+    "    cfg = compose(config_name=\"lit_transformer\")\n",
     "    print(OmegaConf.to_yaml(cfg))\n",
     "    print(cfg)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "c9271d46-37b1-4d06-a603-46b5ed82f821",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2021-07-30 23:08:27.495 | DEBUG    | text_recognizer.data.mappings:__attrs_post_init__:89 - Using data dir: /home/aktersnurra/projects/text-recognizer/data/downloaded/iam/iamdb\n"
-     ]
-    }
-   ],
-   "source": [
-    "tt =instantiate(cfg)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "bf1b07ac-9de7-4d24-a36b-09847bc6bc6f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "WordPieceMapping(extra_symbols={'\\n'}, mapping=['<b>', '<s>', '<e>', '<p>', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '!', '\"', '#', '&', \"'\", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '?', '\\n'], inverse_mapping={'<b>': 0, '<s>': 1, '<e>': 2, '<p>': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, 'A': 14, 'B': 15, 'C': 16, 'D': 17, 'E': 18, 'F': 19, 'G': 20, 'H': 21, 'I': 22, 'J': 23, 'K': 24, 'L': 25, 'M': 26, 'N': 27, 'O': 28, 'P': 29, 'Q': 30, 'R': 31, 'S': 32, 'T': 33, 'U': 34, 'V': 35, 'W': 36, 'X': 37, 'Y': 38, 'Z': 39, 'a': 40, 'b': 41, 'c': 42, 'd': 43, 'e': 44, 'f': 45, 'g': 46, 'h': 47, 'i': 48, 'j': 49, 'k': 50, 'l': 51, 'm': 52, 'n': 53, 'o': 54, 'p': 55, 'q': 56, 'r': 57, 's': 58, 't': 59, 'u': 60, 'v': 61, 'w': 62, 'x': 63, 'y': 64, 'z': 65, ' ': 66, '!': 67, '\"': 68, '#': 69, '&': 70, \"'\": 71, '(': 72, ')': 73, '*': 74, '+': 75, ',': 76, '-': 77, '.': 78, '/': 79, ':': 80, ';': 81, '?': 82, '\\n': 83}, input_size=[28, 28], data_dir=PosixPath('/home/aktersnurra/projects/text-recognizer/data/downloaded/iam/iamdb'), num_features=1000, tokens='iamdb_1kwp_tokens_1000.txt', lexicon='iamdb_1kwp_lex_1000.txt', use_words=False, prepend_wordsep=False, special_tokens={'<p>', '<s>', '<e>'}, wordpiece_processor=<text_recognizer.data.iam_preprocessor.Preprocessor object at 0x7fa4ec7ea610>)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tt"
-   ]
-  },
-  {
-   "cell_type": "code",
    "execution_count": null,
-   "id": "2452e8f4-cc5f-4763-9a25-4fa27b7f143e",
+   "id": "9c797159-845e-42c6-bd65-1c976ad627cd",
    "metadata": {},
    "outputs": [],
    "source": [
-    "tt.mapping"
+    "# context initialization\n",
+    "with initialize(config_path=\"../training/conf/network/\", job_name=\"test_app\"):\n",
+    "    cfg = compose(config_name=\"conv_transformer\")\n",
+    "    print(OmegaConf.to_yaml(cfg))\n",
+    "    print(cfg)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6b722ca0-9c65-4f90-be4e-b7334ea81237",
+   "id": "af2c8cfa-0b45-4681-b671-0f97ace62516",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# context initialization\n",
-    "with initialize(config_path=\"../training/conf/model/\", job_name=\"test_app\"):\n",
-    "    cfg = compose(config_name=\"lit_transformer\")\n",
-    "    print(OmegaConf.to_yaml(cfg))\n",
-    "    print(cfg)"
+    "net = instantiate(cfg)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9c797159-845e-42c6-bd65-1c976ad627cd",
-   "metadata": {},
+   "id": "8f0742ad-5e2f-42d5-83e7-6e46398b4f0f",
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
-    "# context initialization\n",
-    "with initialize(config_path=\"../training/conf/network/\", job_name=\"test_app\"):\n",
-    "    cfg = compose(config_name=\"conv_transformer\")\n",
-    "    print(OmegaConf.to_yaml(cfg))\n",
-    "    print(cfg)"
+    "net"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dcfbe2ab-6775-4aa4-acf4-57203a3f5511",
+   "id": "40be59bc-db79-4af1-9df4-e280f7a56481",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from importlib import import_module"
+    "img = torch.rand(4, 1, 576, 640)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e3d4c70e-509d-457a-ac81-2bac27cb95d2",
+   "id": "d5a8f10b-edf5-4a18-9747-f016db72c384",
    "metadata": {},
    "outputs": [],
    "source": [
-    "x = import_module(\"text_recognizer.networks.transformer.attention\")"
+    "y = torch.randint(0, 1006, (4, 451))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "039d4a7f-f70d-43a1-8b5f-7e766ac01010",
+   "id": "19423ef1-3d98-4af3-8748-fdd3bb817300",
    "metadata": {},
    "outputs": [],
    "source": [
-    "y = partial(getattr(x, \"Attention\"), dim=16, num_heads=2, **cfg.decoder.attn_kwargs)"
+    "y.shape"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9be1d661-bfac-4826-ab8d-453557713f68",
+   "id": "0712ee7e-4f66-4fb1-bc91-d8a127eb7ac7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "y().causal"
+    "net = net.cuda()\n",
+    "img = img.cuda()\n",
+    "y = y.cuda()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "54b35e6f-35db-4769-8bc5-ed1764768cf2",
+   "id": "719154b4-47db-4c91-bae4-8c572c4a4536",
    "metadata": {},
    "outputs": [],
    "source": [
-    "y(causal=True)"
+    "net(img, y).shape"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "af2c8cfa-0b45-4681-b671-0f97ace62516",
+   "id": "bcb7db0f-0afe-44eb-9bb7-b988fbead95a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "net = instantiate(cfg)"
+    "from torchsummary import summary"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8f0742ad-5e2f-42d5-83e7-6e46398b4f0f",
+   "id": "31af8ee1-28d3-46b8-a847-6506d29bc45c",
    "metadata": {},
    "outputs": [],
    "source": [
-    "net"
+    "summary(net, [(1, 576, 640), (451,)], device=\"cpu\", depth=2)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "709be6cc-6708-4561-ad45-28f433612a0d",
+   "id": "4d6d836f-d169-48b4-92e6-ca17179e6f85",
    "metadata": {},
    "outputs": [],
    "source": []