{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "7c02ae76-b540-4b16-9492-e9210b3b9249",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "os.environ['CUDA_VISIBLE_DEVICE'] = ''\n",
    "import random\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import numpy as np\n",
    "from omegaconf import OmegaConf\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "from importlib.util import find_spec\n",
    "if find_spec(\"text_recognizer\") is None:\n",
    "    import sys\n",
    "    sys.path.append('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "ccdb6dde-47e5-429a-88f2-0764fb7e259a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from hydra import compose, initialize\n",
    "from omegaconf import OmegaConf\n",
    "from hydra.utils import instantiate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "3cf50475-39f2-4642-a7d1-5bcbc0a036f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"../training/conf/network/convnext.yaml\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "e52ecb01-c975-4e55-925d-1182c7aea473",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(path, \"rb\") as f:\n",
    "    cfg = OmegaConf.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "f939aa37-7b1d-45cc-885c-323c4540bda1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'_target_': 'text_recognizer.networks.convnext.ConvNext', 'dim': 16, 'dim_mults': [2, 4, 8], 'depths': [3, 3, 6], 'downsampling_factors': [[2, 2], [2, 2], [2, 2]]}"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cfg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "aaeab329-aeb0-4a1b-aa35-5a2aab81b1d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "net = instantiate(cfg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "618b997c-e6a6-4487-b70c-9d260cb556d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from torchinfo import summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "25759b7b-8deb-4163-b75d-a1357c9fe88f",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "====================================================================================================\n",
       "Layer (type:depth-idx)                             Output Shape              Param #\n",
       "====================================================================================================\n",
       "ConvNext                                           [2, 128, 7, 128]          --\n",
       "├─Conv2d: 1-1                                      [2, 16, 56, 1024]         800\n",
       "├─ModuleList: 1-2                                  --                        --\n",
       "│    └─ModuleList: 2-1                             --                        --\n",
       "│    │    └─ConvNextBlock: 3-1                     [2, 16, 56, 1024]         --\n",
       "│    │    │    └─Conv2d: 4-1                       [2, 16, 56, 1024]         800\n",
       "│    │    │    └─Sequential: 4-2                   [2, 16, 56, 1024]         9,280\n",
       "│    │    │    └─Identity: 4-3                     [2, 16, 56, 1024]         --\n",
       "│    │    └─ModuleList: 3-2                        --                        --\n",
       "│    │    │    └─ConvNextBlock: 4-4                [2, 16, 56, 1024]         10,080\n",
       "│    │    │    └─ConvNextBlock: 4-5                [2, 16, 56, 1024]         10,080\n",
       "│    │    │    └─ConvNextBlock: 4-6                [2, 16, 56, 1024]         10,080\n",
       "│    │    └─Downsample: 3-3                        [2, 32, 28, 512]          --\n",
       "│    │    │    └─Sequential: 4-7                   [2, 32, 28, 512]          2,080\n",
       "│    └─ModuleList: 2-2                             --                        --\n",
       "│    │    └─ConvNextBlock: 3-4                     [2, 32, 28, 512]          --\n",
       "│    │    │    └─Conv2d: 4-8                       [2, 32, 28, 512]          1,600\n",
       "│    │    │    └─Sequential: 4-9                   [2, 32, 28, 512]          36,992\n",
       "│    │    │    └─Identity: 4-10                    [2, 32, 28, 512]          --\n",
       "│    │    └─ModuleList: 3-5                        --                        --\n",
       "│    │    │    └─ConvNextBlock: 4-11               [2, 32, 28, 512]          38,592\n",
       "│    │    │    └─ConvNextBlock: 4-12               [2, 32, 28, 512]          38,592\n",
       "│    │    │    └─ConvNextBlock: 4-13               [2, 32, 28, 512]          38,592\n",
       "│    │    └─Downsample: 3-6                        [2, 64, 14, 256]          --\n",
       "│    │    │    └─Sequential: 4-14                  [2, 64, 14, 256]          8,256\n",
       "│    └─ModuleList: 2-3                             --                        --\n",
       "│    │    └─ConvNextBlock: 3-7                     [2, 64, 14, 256]          --\n",
       "│    │    │    └─Conv2d: 4-15                      [2, 64, 14, 256]          3,200\n",
       "│    │    │    └─Sequential: 4-16                  [2, 64, 14, 256]          147,712\n",
       "│    │    │    └─Identity: 4-17                    [2, 64, 14, 256]          --\n",
       "│    │    └─ModuleList: 3-8                        --                        --\n",
       "│    │    │    └─ConvNextBlock: 4-18               [2, 64, 14, 256]          150,912\n",
       "│    │    │    └─ConvNextBlock: 4-19               [2, 64, 14, 256]          150,912\n",
       "│    │    │    └─ConvNextBlock: 4-20               [2, 64, 14, 256]          150,912\n",
       "│    │    │    └─ConvNextBlock: 4-21               [2, 64, 14, 256]          150,912\n",
       "│    │    │    └─ConvNextBlock: 4-22               [2, 64, 14, 256]          150,912\n",
       "│    │    │    └─ConvNextBlock: 4-23               [2, 64, 14, 256]          150,912\n",
       "│    │    └─Downsample: 3-9                        [2, 128, 7, 128]          --\n",
       "│    │    │    └─Sequential: 4-24                  [2, 128, 7, 128]          32,896\n",
       "├─LayerNorm: 1-3                                   [2, 128, 7, 128]          128\n",
       "====================================================================================================\n",
       "Total params: 1,295,232\n",
       "Trainable params: 1,295,232\n",
       "Non-trainable params: 0\n",
       "Total mult-adds (G): 16.88\n",
       "====================================================================================================\n",
       "Input size (MB): 0.46\n",
       "Forward/backward pass size (MB): 598.21\n",
       "Params size (MB): 5.18\n",
       "Estimated Total Size (MB): 603.85\n",
       "===================================================================================================="
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summary(net, (2, 1, 56, 1024), device=\"cpu\", depth=4)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}