{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1e40a88b",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "import torch\n",
    "from torch import nn\n",
    "from importlib.util import find_spec\n",
    "if find_spec(\"text_recognizer\") is None:\n",
    "    import sys\n",
    "    sys.path.append('..')\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3e812a1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import attr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d3a6146b-94b1-4618-a4e4-00f8e23ffdb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from hydra import compose, initialize\n",
    "from omegaconf import OmegaConf\n",
    "from hydra.utils import instantiate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9c797159-845e-42c6-bd65-1c976ad627cd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "encoder:\n",
      "  _target_: text_recognizer.networks.encoders.efficientnet.EfficientNet\n",
      "  arch: b0\n",
      "  out_channels: 1280\n",
      "  stochastic_dropout_rate: 0.2\n",
      "  bn_momentum: 0.99\n",
      "  bn_eps: 0.001\n",
      "decoder:\n",
      "  _target_: text_recognizer.networks.transformer.Decoder\n",
      "  dim: 256\n",
      "  depth: 2\n",
      "  num_heads: 8\n",
      "  attn_fn: text_recognizer.networks.transformer.attention.Attention\n",
      "  attn_kwargs:\n",
      "    num_heads: 8\n",
      "    dim_head: 64\n",
      "    dropout_rate: 0.2\n",
      "  norm_fn: torch.nn.LayerNorm\n",
      "  ff_fn: text_recognizer.networks.transformer.mlp.FeedForward\n",
      "  ff_kwargs:\n",
      "    dim: 256\n",
      "    dim_out: null\n",
      "    expansion_factor: 4\n",
      "    glu: true\n",
      "    dropout_rate: 0.2\n",
      "  rotary_emb: null\n",
      "  rotary_emb_dim: null\n",
      "  cross_attend: true\n",
      "  pre_norm: true\n",
      "_target_: text_recognizer.networks.conv_transformer.ConvTransformer\n",
      "input_dims:\n",
      "- 1\n",
      "- 576\n",
      "- 640\n",
      "hidden_dim: 256\n",
      "dropout_rate: 0.2\n",
      "max_output_len: 682\n",
      "num_classes: 1004\n",
      "start_token: <s>\n",
      "end_token: <e>\n",
      "pad_token: <p>\n",
      "\n",
      "{'encoder': {'_target_': 'text_recognizer.networks.encoders.efficientnet.EfficientNet', 'arch': 'b0', 'out_channels': 1280, 'stochastic_dropout_rate': 0.2, 'bn_momentum': 0.99, 'bn_eps': 0.001}, 'decoder': {'_target_': 'text_recognizer.networks.transformer.Decoder', 'dim': 256, 'depth': 2, 'num_heads': 8, 'attn_fn': 'text_recognizer.networks.transformer.attention.Attention', 'attn_kwargs': {'num_heads': 8, 'dim_head': 64, 'dropout_rate': 0.2}, 'norm_fn': 'torch.nn.LayerNorm', 'ff_fn': 'text_recognizer.networks.transformer.mlp.FeedForward', 'ff_kwargs': {'dim': 256, 'dim_out': None, 'expansion_factor': 4, 'glu': True, 'dropout_rate': 0.2}, 'rotary_emb': None, 'rotary_emb_dim': None, 'cross_attend': True, 'pre_norm': True}, '_target_': 'text_recognizer.networks.conv_transformer.ConvTransformer', 'input_dims': [1, 576, 640], 'hidden_dim': 256, 'dropout_rate': 0.2, 'max_output_len': 682, 'num_classes': 1004, 'start_token': '<s>', 'end_token': '<e>', 'pad_token': '<p>'}\n"
     ]
    }
   ],
   "source": [
    "# context initialization\n",
    "with initialize(config_path=\"../training/conf/network/\", job_name=\"test_app\"):\n",
    "    cfg = compose(config_name=\"conv_transformer\")\n",
    "    print(OmegaConf.to_yaml(cfg))\n",
    "    print(cfg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "cdb895b6-8949-4318-8a40-06fb5ed5e8d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_target_: text_recognizer.data.mappings.WordPieceMapping\n",
      "num_features: 1000\n",
      "tokens: iamdb_1kwp_tokens_1000.txt\n",
      "lexicon: iamdb_1kwp_lex_1000.txt\n",
      "data_dir: null\n",
      "use_words: false\n",
      "prepend_wordsep: false\n",
      "special_tokens:\n",
      "- <s>\n",
      "- <e>\n",
      "- <p>\n",
      "extra_symbols:\n",
      "- '\n",
      "\n",
      "  '\n",
      "\n",
      "{'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['<s>', '<e>', '<p>'], 'extra_symbols': ['\\n']}\n"
     ]
    }
   ],
   "source": [
    "with initialize(config_path=\"../training/conf/mapping/\", job_name=\"test_app\"):\n",
    "    cfg = compose(config_name=\"word_piece\")\n",
    "    print(OmegaConf.to_yaml(cfg))\n",
    "    print(cfg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b6181656-580a-4d96-8495-b6bb510944cc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'_target_': 'text_recognizer.data.mappings.WordPieceMapping', 'num_features': 1000, 'tokens': 'iamdb_1kwp_tokens_1000.txt', 'lexicon': 'iamdb_1kwp_lex_1000.txt', 'data_dir': None, 'use_words': False, 'prepend_wordsep': False, 'special_tokens': ['<s>', '<e>', '<p>'], 'extra_symbols': ['\\n']}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cfg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cd80d84-3ae5-4bb4-bc00-0dac7b22e134",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0c123c76-ed90-49fa-903b-70ad60a33f16",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-07-29 23:02:56.650 | DEBUG    | text_recognizer.data.mappings:_configure_wordpiece_processor:104 - Using data dir: /home/aktersnurra/projects/text-recognizer/data/downloaded/iam/iamdb\n"
     ]
    }
   ],
   "source": [
    "mapping = instantiate(cfg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ff6c57f0-3c96-418e-8192-cd12bf79c073",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([1002])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mapping.get_index(\"<p>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "348391ec-0cf7-49f6-bac2-26bc8c966705",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1006"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(mapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "67673bf2-79c6-4010-93dd-9c9ba8f9a90e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([1003])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mapping.get_index(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8923ea1e-b571-42ee-bfd7-4984aa70644f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}