{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from PIL import Image\n", "import torch\n", "from importlib.util import find_spec\n", "if find_spec(\"text_recognizer\") is None:\n", " import sys\n", " sys.path.append('..')" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "from text_recognizer.networks.residual_network import IdentityBlock, ResidualBlock, BasicBlock, BottleNeckBlock, ResidualLayer, Encoder, ResidualNetwork" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "IdentityBlock(32, 64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ResidualBlock(32, 64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dummy = torch.ones((1, 32, 224, 224))\n", "\n", "block = BasicBlock(32, 64)\n", "block(dummy).shape\n", "print(block)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dummy = torch.ones((1, 32, 10, 10))\n", "\n", "block = BottleNeckBlock(32, 64)\n", "block(dummy).shape\n", "print(block)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dummy = torch.ones((1, 64, 48, 48))\n", "\n", "layer = ResidualLayer(64, 128, block=BasicBlock, num_blocks=3)\n", "layer(dummy).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "blocks_sizes=[64, 128, 256, 512]\n", "list(zip(blocks_sizes, blocks_sizes[1:]))" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "e = Encoder(depths=[2, 1], block_sizes= [96, 128])" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Encoder(\n", " (gate): Sequential(\n", " (0): Conv2d(1, 96, kernel_size=(3, 3), stride=(2, 2), padding=(3, 3), bias=False)\n", " (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (2): ReLU(inplace=True)\n", " (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", " )\n", " (blocks): Sequential(\n", " (0): ResidualLayer(\n", " (blocks): Sequential(\n", " (0): BasicBlock(\n", " (blocks): Sequential(\n", " (0): Sequential(\n", " (0): Conv2dAuto(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", " (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " (1): ReLU(inplace=True)\n", " (2): Sequential(\n", " (0): Conv2dAuto(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", " (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " )\n", " (activation_fn): ReLU(inplace=True)\n", " (shortcut): None\n", " )\n", " (1): BasicBlock(\n", " (blocks): Sequential(\n", " (0): Sequential(\n", " (0): Conv2dAuto(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", " (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " (1): ReLU(inplace=True)\n", " (2): Sequential(\n", " (0): Conv2dAuto(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", " (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " )\n", " (activation_fn): ReLU(inplace=True)\n", " (shortcut): None\n", " )\n", " )\n", " )\n", " (1): ResidualLayer(\n", " (blocks): Sequential(\n", " (0): BasicBlock(\n", " (blocks): Sequential(\n", " (0): Sequential(\n", " (0): Conv2dAuto(96, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " (1): ReLU(inplace=True)\n", " (2): Sequential(\n", " (0): Conv2dAuto(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " )\n", " (activation_fn): ReLU(inplace=True)\n", " (shortcut): Sequential(\n", " (0): Conv2d(96, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " )\n", " )\n", " )\n", " )\n", ")" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Encoder(**{\"depths\": [2, 1], \"block_sizes\": [96, 128]})" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "from torchsummary import summary" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------------------------------\n", " Layer (type) Output Shape Param #\n", "================================================================\n", " Conv2d-1 [-1, 96, 16, 16] 864\n", " BatchNorm2d-2 [-1, 96, 16, 16] 192\n", " ReLU-3 [-1, 96, 16, 16] 0\n", " MaxPool2d-4 [-1, 96, 8, 8] 0\n", " Conv2dAuto-5 [-1, 96, 8, 8] 82,944\n", " BatchNorm2d-6 [-1, 96, 8, 8] 192\n", " ReLU-7 [-1, 96, 8, 8] 0\n", " ReLU-8 [-1, 96, 8, 8] 0\n", " Conv2dAuto-9 [-1, 96, 8, 8] 82,944\n", " BatchNorm2d-10 [-1, 96, 8, 8] 192\n", " ReLU-11 [-1, 96, 8, 8] 0\n", " ReLU-12 [-1, 96, 8, 8] 0\n", " BasicBlock-13 [-1, 96, 8, 8] 0\n", " Conv2dAuto-14 [-1, 96, 8, 8] 82,944\n", " BatchNorm2d-15 [-1, 96, 8, 8] 192\n", " ReLU-16 [-1, 96, 8, 8] 0\n", " ReLU-17 [-1, 96, 8, 8] 0\n", " Conv2dAuto-18 [-1, 96, 8, 8] 82,944\n", " BatchNorm2d-19 [-1, 96, 8, 8] 192\n", " ReLU-20 [-1, 96, 8, 8] 0\n", " ReLU-21 [-1, 96, 8, 8] 0\n", " BasicBlock-22 [-1, 96, 8, 8] 0\n", " ResidualLayer-23 [-1, 96, 8, 8] 0\n", " Conv2d-24 [-1, 128, 4, 4] 12,288\n", " BatchNorm2d-25 [-1, 128, 4, 4] 256\n", " Conv2dAuto-26 [-1, 128, 4, 4] 110,592\n", " BatchNorm2d-27 [-1, 128, 4, 4] 256\n", " ReLU-28 [-1, 128, 4, 4] 0\n", " ReLU-29 [-1, 128, 4, 4] 0\n", " Conv2dAuto-30 [-1, 128, 4, 4] 147,456\n", " BatchNorm2d-31 [-1, 128, 4, 4] 256\n", " ReLU-32 [-1, 128, 4, 4] 0\n", " ReLU-33 [-1, 128, 4, 4] 0\n", " BasicBlock-34 [-1, 128, 4, 4] 0\n", " ResidualLayer-35 [-1, 128, 4, 4] 0\n", "================================================================\n", "Total params: 604,704\n", "Trainable params: 604,704\n", "Non-trainable params: 0\n", "----------------------------------------------------------------\n", "Input size (MB): 0.00\n", "Forward/backward pass size (MB): 1.69\n", "Params size (MB): 2.31\n", "Estimated Total Size (MB): 4.00\n", "----------------------------------------------------------------\n" ] } ], "source": [ "summary(e, (1, 28, 28), device=\"cpu\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 4 }