Efficientnet working

author: Gustaf Rydholm <gustaf.rydholm@gmail.com> 2021-06-25 01:19:12 +0200
committer: Gustaf Rydholm <gustaf.rydholm@gmail.com> 2021-06-25 01:19:12 +0200
commit: e9753c0c1476d4c5aa614e1f65a8dd4302a1ce5b (patch)
tree: c2ccfe314e61f0bd24b23acf046e733e723919a2
parent: 1d7f674236d0622addc243d15c05a1dd30ca8121 (diff)
4 files changed, 243 insertions, 194 deletions
diff --git a/notebooks/00-scratch-pad.ipynb b/notebooks/00-scratch-pad.ipynb
index 1816021..32f96d1 100644
--- a/notebooks/00-scratch-pad.ipynb
+++ b/notebooks/00-scratch-pad.ipynb
@@ -2,18 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
@@ -39,13 +30,180 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from text_recognizer.networks.encoders.efficientnet.efficientnet import EfficientNet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "en = EfficientNet(\"b0\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "==========================================================================================\n",
+      "Layer (type:depth-idx)                   Output Shape              Param #\n",
+      "==========================================================================================\n",
+      "├─Sequential: 1-1                        [-1, 32, 111, 111]        --\n",
+      "|    └─Conv2d: 2-1                       [-1, 32, 111, 111]        288\n",
+      "|    └─BatchNorm2d: 2-2                  [-1, 32, 111, 111]        64\n",
+      "|    └─Mish: 2-3                         [-1, 32, 111, 111]        --\n",
+      "├─ModuleList: 1                          []                        --\n",
+      "|    └─MBConvBlock: 2-4                  [-1, 16, 111, 111]        --\n",
+      "|    |    └─Sequential: 3-1              [-1, 32, 111, 111]        352\n",
+      "|    |    └─Sequential: 3-2              [-1, 32, 111, 111]        552\n",
+      "|    |    └─Sequential: 3-3              [-1, 16, 111, 111]        544\n",
+      "|    └─MBConvBlock: 2-5                  [-1, 24, 55, 55]          --\n",
+      "|    |    └─Sequential: 3-4              [-1, 96, 111, 111]        1,728\n",
+      "|    |    └─Sequential: 3-5              [-1, 96, 55, 55]          1,056\n",
+      "|    |    └─Sequential: 3-6              [-1, 96, 55, 55]          4,728\n",
+      "|    |    └─Sequential: 3-7              [-1, 24, 55, 55]          2,352\n",
+      "|    └─MBConvBlock: 2-6                  [-1, 24, 55, 55]          --\n",
+      "|    |    └─Sequential: 3-8              [-1, 144, 55, 55]         3,744\n",
+      "|    |    └─Sequential: 3-9              [-1, 144, 55, 55]         1,584\n",
+      "|    |    └─Sequential: 3-10             [-1, 144, 55, 55]         10,548\n",
+      "|    |    └─Sequential: 3-11             [-1, 24, 55, 55]          3,504\n",
+      "|    └─MBConvBlock: 2-7                  [-1, 40, 27, 27]          --\n",
+      "|    |    └─Sequential: 3-12             [-1, 144, 55, 55]         3,744\n",
+      "|    |    └─Sequential: 3-13             [-1, 144, 27, 27]         3,888\n",
+      "|    |    └─Sequential: 3-14             [-1, 144, 27, 27]         10,548\n",
+      "|    |    └─Sequential: 3-15             [-1, 40, 27, 27]          5,840\n",
+      "|    └─MBConvBlock: 2-8                  [-1, 40, 27, 27]          --\n",
+      "|    |    └─Sequential: 3-16             [-1, 240, 27, 27]         10,080\n",
+      "|    |    └─Sequential: 3-17             [-1, 240, 27, 27]         6,480\n",
+      "|    |    └─Sequential: 3-18             [-1, 240, 27, 27]         29,100\n",
+      "|    |    └─Sequential: 3-19             [-1, 40, 27, 27]          9,680\n",
+      "|    └─MBConvBlock: 2-9                  [-1, 80, 13, 13]          --\n",
+      "|    |    └─Sequential: 3-20             [-1, 240, 27, 27]         10,080\n",
+      "|    |    └─Sequential: 3-21             [-1, 240, 13, 13]         2,640\n",
+      "|    |    └─Sequential: 3-22             [-1, 240, 13, 13]         29,100\n",
+      "|    |    └─Sequential: 3-23             [-1, 80, 13, 13]          19,360\n",
+      "|    └─MBConvBlock: 2-10                 [-1, 80, 13, 13]          --\n",
+      "|    |    └─Sequential: 3-24             [-1, 480, 13, 13]         39,360\n",
+      "|    |    └─Sequential: 3-25             [-1, 480, 13, 13]         5,280\n",
+      "|    |    └─Sequential: 3-26             [-1, 480, 13, 13]         115,800\n",
+      "|    |    └─Sequential: 3-27             [-1, 80, 13, 13]          38,560\n",
+      "|    └─MBConvBlock: 2-11                 [-1, 80, 13, 13]          --\n",
+      "|    |    └─Sequential: 3-28             [-1, 480, 13, 13]         39,360\n",
+      "|    |    └─Sequential: 3-29             [-1, 480, 13, 13]         5,280\n",
+      "|    |    └─Sequential: 3-30             [-1, 480, 13, 13]         115,800\n",
+      "|    |    └─Sequential: 3-31             [-1, 80, 13, 13]          38,560\n",
+      "|    └─MBConvBlock: 2-12                 [-1, 112, 13, 13]         --\n",
+      "|    |    └─Sequential: 3-32             [-1, 480, 13, 13]         39,360\n",
+      "|    |    └─Sequential: 3-33             [-1, 480, 13, 13]         12,960\n",
+      "|    |    └─Sequential: 3-34             [-1, 480, 13, 13]         115,800\n",
+      "|    |    └─Sequential: 3-35             [-1, 112, 13, 13]         53,984\n",
+      "|    └─MBConvBlock: 2-13                 [-1, 112, 13, 13]         --\n",
+      "|    |    └─Sequential: 3-36             [-1, 672, 13, 13]         76,608\n",
+      "|    |    └─Sequential: 3-37             [-1, 672, 13, 13]         18,144\n",
+      "|    |    └─Sequential: 3-38             [-1, 672, 13, 13]         226,632\n",
+      "|    |    └─Sequential: 3-39             [-1, 112, 13, 13]         75,488\n",
+      "|    └─MBConvBlock: 2-14                 [-1, 112, 13, 13]         --\n",
+      "|    |    └─Sequential: 3-40             [-1, 672, 13, 13]         76,608\n",
+      "|    |    └─Sequential: 3-41             [-1, 672, 13, 13]         18,144\n",
+      "|    |    └─Sequential: 3-42             [-1, 672, 13, 13]         226,632\n",
+      "|    |    └─Sequential: 3-43             [-1, 112, 13, 13]         75,488\n",
+      "|    └─MBConvBlock: 2-15                 [-1, 192, 6, 6]           --\n",
+      "|    |    └─Sequential: 3-44             [-1, 672, 13, 13]         76,608\n",
+      "|    |    └─Sequential: 3-45             [-1, 672, 6, 6]           18,144\n",
+      "|    |    └─Sequential: 3-46             [-1, 672, 6, 6]           226,632\n",
+      "|    |    └─Sequential: 3-47             [-1, 192, 6, 6]           129,408\n",
+      "|    └─MBConvBlock: 2-16                 [-1, 192, 6, 6]           --\n",
+      "|    |    └─Sequential: 3-48             [-1, 1152, 6, 6]          223,488\n",
+      "|    |    └─Sequential: 3-49             [-1, 1152, 6, 6]          31,104\n",
+      "|    |    └─Sequential: 3-50             [-1, 1152, 6, 6]          664,992\n",
+      "|    |    └─Sequential: 3-51             [-1, 192, 6, 6]           221,568\n",
+      "|    └─MBConvBlock: 2-17                 [-1, 192, 6, 6]           --\n",
+      "|    |    └─Sequential: 3-52             [-1, 1152, 6, 6]          223,488\n",
+      "|    |    └─Sequential: 3-53             [-1, 1152, 6, 6]          31,104\n",
+      "|    |    └─Sequential: 3-54             [-1, 1152, 6, 6]          664,992\n",
+      "|    |    └─Sequential: 3-55             [-1, 192, 6, 6]           221,568\n",
+      "|    └─MBConvBlock: 2-18                 [-1, 192, 6, 6]           --\n",
+      "|    |    └─Sequential: 3-56             [-1, 1152, 6, 6]          223,488\n",
+      "|    |    └─Sequential: 3-57             [-1, 1152, 6, 6]          31,104\n",
+      "|    |    └─Sequential: 3-58             [-1, 1152, 6, 6]          664,992\n",
+      "|    |    └─Sequential: 3-59             [-1, 192, 6, 6]           221,568\n",
+      "|    └─MBConvBlock: 2-19                 [-1, 320, 6, 6]           --\n",
+      "|    |    └─Sequential: 3-60             [-1, 1152, 6, 6]          223,488\n",
+      "|    |    └─Sequential: 3-61             [-1, 1152, 6, 6]          12,672\n",
+      "|    |    └─Sequential: 3-62             [-1, 1152, 6, 6]          664,992\n",
+      "|    |    └─Sequential: 3-63             [-1, 320, 6, 6]           369,280\n",
+      "├─Sequential: 1-2                        [-1, 1280, 6, 6]          --\n",
+      "|    └─Conv2d: 2-20                      [-1, 1280, 6, 6]          409,600\n",
+      "|    └─BatchNorm2d: 2-21                 [-1, 1280, 6, 6]          2,560\n",
+      "==========================================================================================\n",
+      "Total params: 7,142,272\n",
+      "Trainable params: 7,142,272\n",
+      "Non-trainable params: 0\n",
+      "Total mult-adds (M): 657.05\n",
+      "==========================================================================================\n",
+      "Input size (MB): 0.19\n",
+      "Forward/backward pass size (MB): 115.14\n",
+      "Params size (MB): 27.25\n",
+      "Estimated Total Size (MB): 142.58\n",
+      "==========================================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "summary(en, (1, 224, 224));"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2, 2)"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(2,) * 2"
+   ]
+  },
+  {
+   "cell_type": "code",
    "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/aktersnurra/.cache/pypoetry/virtualenvs/text-recognizer-ejNaVa9M-py3.9/lib/python3.9/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at  /pytorch/c10/cuda/CUDAFunctions.cpp:115.)\n",
+      "  return torch._C._cuda_getDeviceCount() > 0\n"
+     ]
+    },
+    {
      "data": {
       "text/plain": [
-       "True"
+       "False"
       ]
      },
      "execution_count": 4,
@@ -195,21 +353,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'text_recognizer.networks.encoders.efficientnet.efficientnet'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-6-c209d858dfb1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtext_recognizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnetworks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoders\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mefficientnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mefficientnet\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mEfficientNet\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'text_recognizer.networks.encoders.efficientnet.efficientnet'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from text_recognizer.networks.encoders.efficientnet.efficientnet import EfficientNet"
    ]
diff --git a/text_recognizer/networks/encoders/efficientnet/efficientnet.py b/text_recognizer/networks/encoders/efficientnet/efficientnet.py
index 98d58fd..b527d90 100644
--- a/text_recognizer/networks/encoders/efficientnet/efficientnet.py
+++ b/text_recognizer/networks/encoders/efficientnet/efficientnet.py
@@ -1,13 +1,9 @@
 """Efficient net."""
-from typing import Tuple
-
 from torch import nn, Tensor
 
 from .mbconv import MBConvBlock
 from .utils import (
     block_args,
-    calculate_output_image_size,
-    get_same_padding_conv2d,
     round_filters,
     round_repeats,
 )
@@ -28,11 +24,19 @@ class EfficientNet(nn.Module):
         "l2": (4.3, 5.3, 0.5),
     }
 
-    def __init__(self, arch: str, image_size: Tuple[int, int]) -> None:
+    def __init__(
+        self,
+        arch: str,
+        stochastic_dropout_rate: float = 0.2,
+        bn_momentum: float = 0.99,
+        bn_eps: float = 1.0e-3,
+    ) -> None:
         super().__init__()
         assert arch in self.archs, f"{arch} not a valid efficient net architecure!"
         self.arch = self.archs[arch]
-        self.image_size = image_size
+        self.stochastic_dropout_rate = stochastic_dropout_rate
+        self.bn_momentum = 1 - bn_momentum
+        self.bn_eps = bn_eps
         self._conv_stem: nn.Sequential = None
         self._blocks: nn.Sequential = None
         self._conv_head: nn.Sequential = None
@@ -42,57 +46,53 @@ class EfficientNet(nn.Module):
         _block_args = block_args()
         in_channels = 1  # BW
         out_channels = round_filters(32, self.arch)
-        Conv2d = get_same_padding_conv2d(image_size=self.image_size)
         self._conv_stem = nn.Sequential(
-            Conv2d(
+            nn.Conv2d(
                 in_channels=in_channels,
                 out_channels=out_channels,
                 kernel_size=3,
-                stride=2,
+                stride=(2, 2),
                 bias=False,
             ),
-            nn.BatchNorm2d(num_features=out_channels, momentum=bn_momentum, eps=bn_eps),
+            nn.BatchNorm2d(
+                num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps
+            ),
             nn.Mish(inplace=True),
         )
-        image_size = calculate_output_image_size(self.image_size, 2)
         self._blocks = nn.ModuleList([])
         for args in _block_args:
             args.in_channels = round_filters(args.in_channels, self.arch)
             args.out_channels = round_filters(args.out_channels, self.arch)
-            args.num_repeat = round_repeats(args.num_repeat, self.arch)
-
-            self._blocks.append(
-                MBConvBlock(
-                    **args,
-                    bn_momentum=bn_momentum,
-                    bn_eps=bn_eps,
-                    image_size=image_size,
-                )
-            )
-            image_size = calculate_output_image_size(image_size, args.stride)
-            if args.num_repeat > 1:
-                args.in_channels = args.out_channels
-                args.stride = 1
-            for _ in range(args.num_repeat - 1):
+            args.num_repeats = round_repeats(args.num_repeats, self.arch)
+            for _ in range(args.num_repeats):
                 self._blocks.append(
                     MBConvBlock(
                         **args,
-                        bn_momentum=bn_momentum,
-                        bn_eps=bn_eps,
-                        image_size=image_size,
+                        bn_momentum=self.bn_momentum,
+                        bn_eps=self.bn_eps,
                     )
                 )
+                args.in_channels = args.out_channels
+                args.stride = 1
 
-        in_channels = args.out_channels
+        in_channels = round_filters(320, self.arch)
         out_channels = round_filters(1280, self.arch)
-        Conv2d = get_same_padding_conv2d(image_size=image_size)
         self._conv_head = nn.Sequential(
-            Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
-            nn.BatchNorm2d(num_features=out_channels, momentum=bn_momentum, eps=bn_eps),
+            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(
+                num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps
+            ),
         )
 
     def extract_features(self, x: Tensor) -> Tensor:
         x = self._conv_stem(x)
+        for i, block in enumerate(self._blocks):
+            stochastic_dropout_rate = self.stochastic_dropout_rate
+            if self.stochastic_dropout_rate:
+                stochastic_dropout_rate *= i / len(self._blocks)
+            x = block(x, stochastic_dropout_rate=stochastic_dropout_rate)
+        self._conv_head(x)
+        return x
 
     def forward(self, x: Tensor) -> Tensor:
-        pass
+        return self.extract_features(x)
diff --git a/text_recognizer/networks/encoders/efficientnet/mbconv.py b/text_recognizer/networks/encoders/efficientnet/mbconv.py
index fbb3f22..e43771a 100644
--- a/text_recognizer/networks/encoders/efficientnet/mbconv.py
+++ b/text_recognizer/networks/encoders/efficientnet/mbconv.py
@@ -1,11 +1,11 @@
 """Mobile inverted residual block."""
-from typing import Any, Optional, Tuple
+from typing import Any, Optional, Union, Tuple
 
 import torch
 from torch import nn, Tensor
-from torch.nn import functional as F
+import torch.nn.functional as F
 
-from .utils import calculate_output_image_size, drop_connection, get_same_padding_conv2d
+from .utils import stochastic_depth
 
 
 class MBConvBlock(nn.Module):
@@ -16,22 +16,30 @@ class MBConvBlock(nn.Module):
         in_channels: int,
         out_channels: int,
         kernel_size: int,
-        stride: int,
+        stride: Union[Tuple[int, int], int],
         bn_momentum: float,
         bn_eps: float,
         se_ratio: float,
         expand_ratio: int,
-        image_size: Optional[Tuple[int, int]],
         *args: Any,
         **kwargs: Any,
     ) -> None:
         super().__init__()
         self.kernel_size = kernel_size
+        self.stride = (stride, ) * 2 if isinstance(stride, int) else stride
         self.bn_momentum = bn_momentum
         self.bn_eps = bn_eps
-        self.in_channels = self.in_channels
+        self.in_channels = in_channels
         self.out_channels = out_channels
 
+        if self.stride == (2, 2):
+            self.pad = [
+                (self.kernel_size - 1) // 2 - 1,
+                (self.kernel_size - 1) // 2,
+            ] * 2
+        else:
+            self.pad = [(self.kernel_size - 1) // 2] * 4
+
         # Placeholders for layers.
         self._inverted_bottleneck: nn.Sequential = None
         self._depthwise: nn.Sequential = None
@@ -39,7 +47,6 @@ class MBConvBlock(nn.Module):
         self._pointwise: nn.Sequential = None
 
         self._build(
-            image_size=image_size,
             in_channels=in_channels,
             out_channels=out_channels,
             kernel_size=kernel_size,
@@ -50,11 +57,10 @@ class MBConvBlock(nn.Module):
 
     def _build(
         self,
-        image_size: Optional[Tuple[int, int]],
         in_channels: int,
         out_channels: int,
         kernel_size: int,
-        stride: int,
+        stride: Union[Tuple[int, int], int],
         expand_ratio: int,
         se_ratio: float,
     ) -> None:
@@ -62,7 +68,6 @@ class MBConvBlock(nn.Module):
         inner_channels = in_channels * expand_ratio
         self._inverted_bottleneck = (
             self._configure_inverted_bottleneck(
-                image_size=image_size,
                 in_channels=in_channels,
                 out_channels=inner_channels,
             )
@@ -71,7 +76,6 @@ class MBConvBlock(nn.Module):
         )
 
         self._depthwise = self._configure_depthwise(
-            image_size=image_size,
             in_channels=inner_channels,
             out_channels=inner_channels,
             groups=inner_channels,
@@ -79,7 +83,6 @@ class MBConvBlock(nn.Module):
             stride=stride,
         )
 
-        image_size = calculate_output_image_size(image_size, stride)
         self._squeeze_excite = (
             self._configure_squeeze_excite(
                 in_channels=inner_channels,
@@ -91,19 +94,17 @@ class MBConvBlock(nn.Module):
         )
 
         self._pointwise = self._configure_pointwise(
-            image_size=image_size, in_channels=inner_channels, out_channels=out_channels
+            in_channels=inner_channels, out_channels=out_channels
         )
 
     def _configure_inverted_bottleneck(
         self,
-        image_size: Optional[Tuple[int, int]],
         in_channels: int,
         out_channels: int,
     ) -> nn.Sequential:
         """Expansion phase."""
-        Conv2d = get_same_padding_conv2d(image_size=image_size)
         return nn.Sequential(
-            Conv2d(
+            nn.Conv2d(
                 in_channels=in_channels,
                 out_channels=out_channels,
                 kernel_size=1,
@@ -117,16 +118,14 @@ class MBConvBlock(nn.Module):
 
     def _configure_depthwise(
         self,
-        image_size: Optional[Tuple[int, int]],
         in_channels: int,
         out_channels: int,
         groups: int,
         kernel_size: int,
-        stride: int,
+        stride: Union[Tuple[int, int], int],
     ) -> nn.Sequential:
-        Conv2d = get_same_padding_conv2d(image_size=image_size)
         return nn.Sequential(
-            Conv2d(
+            nn.Conv2d(
                 in_channels=in_channels,
                 out_channels=out_channels,
                 kernel_size=kernel_size,
@@ -143,16 +142,15 @@ class MBConvBlock(nn.Module):
     def _configure_squeeze_excite(
         self, in_channels: int, out_channels: int, se_ratio: float
     ) -> nn.Sequential:
-        Conv2d = get_same_padding_conv2d(image_size=(1, 1))
         num_squeezed_channels = max(1, int(in_channels * se_ratio))
         return nn.Sequential(
-            Conv2d(
+            nn.Conv2d(
                 in_channels=in_channels,
                 out_channels=num_squeezed_channels,
                 kernel_size=1,
             ),
             nn.Mish(inplace=True),
-            Conv2d(
+            nn.Conv2d(
                 in_channels=num_squeezed_channels,
                 out_channels=out_channels,
                 kernel_size=1,
@@ -160,11 +158,10 @@ class MBConvBlock(nn.Module):
         )
 
     def _configure_pointwise(
-        self, image_size: Optional[Tuple[int, int]], in_channels: int, out_channels: int
+        self, in_channels: int, out_channels: int
     ) -> nn.Sequential:
-        Conv2d = get_same_padding_conv2d(image_size=image_size)
         return nn.Sequential(
-            Conv2d(
+            nn.Conv2d(
                 in_channels=in_channels,
                 out_channels=out_channels,
                 kernel_size=1,
@@ -176,20 +173,23 @@ class MBConvBlock(nn.Module):
         )
 
     def _stochastic_depth(
-        self, x: Tensor, residual: Tensor, drop_connection_rate: Optional[float]
+        self, x: Tensor, residual: Tensor, stochastic_dropout_rate: Optional[float]
     ) -> Tensor:
-        if self.id_skip and self.stride == 1 and self.in_channels == self.out_channels:
-            if drop_connection_rate:
-                x = drop_connection(x, p=drop_connection_rate, training=self.training)
+        if self.stride == (1, 1) and self.in_channels == self.out_channels:
+            if stochastic_dropout_rate:
+                x = stochastic_depth(
+                    x, p=stochastic_dropout_rate, training=self.training
+                )
             x += residual
         return x
 
     def forward(
-        self, x: Tensor, drop_connection_rate: Optional[float] = None
+        self, x: Tensor, stochastic_dropout_rate: Optional[float] = None
     ) -> Tensor:
         residual = x
         if self._inverted_bottleneck is not None:
             x = self._inverted_bottleneck(x)
+        x = F.pad(x, self.pad)
 
         x = self._depthwise(x)
 
@@ -201,5 +201,5 @@ class MBConvBlock(nn.Module):
         x = self._pointwise(x)
 
         # Stochastic depth
-        x = self._stochastic_depth(x, residual, drop_connection_rate)
+        x = self._stochastic_depth(x, residual, stochastic_dropout_rate)
         return x
diff --git a/text_recognizer/networks/encoders/efficientnet/utils.py b/text_recognizer/networks/encoders/efficientnet/utils.py
index ff52485..6f293db 100644
--- a/text_recognizer/networks/encoders/efficientnet/utils.py
+++ b/text_recognizer/networks/encoders/efficientnet/utils.py
@@ -1,27 +1,15 @@
 """Util functions for efficient net."""
 from functools import partial
 import math
-from typing import Any, Optional, Tuple, Type
+from typing import Any, Optional, Union, Tuple, Type
 
 from omegaconf import OmegaConf
 import torch
-from torch import nn, Tensor
-import torch.functional as F
+from torch import Tensor
 
 
-def calculate_output_image_size(
-    image_size: Optional[Tuple[int, int]], stride: int
-) -> Optional[Tuple[int, int]]:
-    """Calculates the output image size when using conv2d with same padding."""
-    if image_size is None:
-        return None
-    height = int(math.ceil(image_size[0] / stride))
-    width = int(math.ceil(image_size[1] / stride))
-    return height, width
-
-
-def drop_connection(x: Tensor, p: float, training: bool) -> Tensor:
-    """Drop connection.
+def stochastic_depth(x: Tensor, p: float, training: bool) -> Tensor:
+    """Stochastic connection.
 
     Drops the entire convolution with a given survival probability.
 
@@ -57,91 +45,6 @@ def drop_connection(x: Tensor, p: float, training: bool) -> Tensor:
     return out
 
 
-def get_same_padding_conv2d(image_size: Optional[Tuple[int, int]]) -> Type[nn.Conv2d]:
-    if image_size is None:
-        return Conv2dDynamicSamePadding
-    return partial(Conv2dStaticSamePadding, image_size=image_size)
-
-
-class Conv2dDynamicSamePadding(nn.Conv2d):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int,
-        stride: int = 1,
-        dilation: int = 1,
-        groups: int = 1,
-        bias: bool = True,
-    ) -> None:
-        super().__init__(
-            in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias
-        )
-        self.stride = [self.stride] * 2
-
-    def forward(self, x: Tensor) -> Tensor:
-        ih, iw = x.shape[-2:]
-        kh, kw = self.weight.shape[-2:]
-        sh, sw = self.stride
-        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
-        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
-        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
-        if pad_h > 0 or pad_w > 0:
-            x = F.pad(
-                x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
-            )
-        return F.conv2d(
-            x,
-            self.weight,
-            self.bias,
-            self.stride,
-            self.padding,
-            self.dilation,
-            self.groups,
-        )
-
-
-class Conv2dStaticSamePadding(nn.Conv2d):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int,
-        image_size: Tuple[int, int],
-        stride: int = 1,
-        **kwargs: Any
-    ):
-        super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
-        self.stride = [self.stride] * 2
-
-        # Calculate padding based on image size and save it.
-        ih, iw = image_size
-        kh, kw = self.weight.shape[-2:]
-        sh, sw = self.stride
-        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
-        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
-        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
-        if pad_h > 0 or pad_w > 0:
-            self.static_padding = nn.ZeroPad2d(
-                (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
-            )
-        else:
-            self.static_padding = nn.Identity()
-
-    def forward(self, x: Tensor) -> Tensor:
-        x = self.static_padding(x)
-        x = F.pad(
-            x,
-            self.weight,
-            self.bias,
-            self.stride,
-            self.padding,
-            self.dilation,
-            self.groups,
-        )
-        return x
-
-
 def round_filters(filters: int, arch: Tuple[float, float, float]) -> int:
     multiplier = arch[0]
     divisor = 8
@@ -160,7 +63,7 @@ def block_args():
     keys = [
         "num_repeats",
         "kernel_size",
-        "strides",
+        "stride",
         "expand_ratio",
         "in_channels",
         "out_channels",
author	Gustaf Rydholm <gustaf.rydholm@gmail.com>	2021-06-25 01:19:12 +0200
committer	Gustaf Rydholm <gustaf.rydholm@gmail.com>	2021-06-25 01:19:12 +0200
commit	e9753c0c1476d4c5aa614e1f65a8dd4302a1ce5b (patch)
tree	c2ccfe314e61f0bd24b23acf046e733e723919a2
parent	1d7f674236d0622addc243d15c05a1dd30ca8121 (diff)