From 85953dcbf4893653311d9a45b127d74e76af4ad3 Mon Sep 17 00:00:00 2001 From: aktersnurra Date: Wed, 16 Jun 2021 20:12:03 +0200 Subject: Working on MBconvblock --- .../networks/encoders/efficient_net/__init__.py | 0 .../networks/encoders/efficient_net/block.py | 160 -------------------- .../networks/encoders/efficientnet/__init__.py | 0 .../networks/encoders/efficientnet/mbconv_block.py | 163 +++++++++++++++++++++ 4 files changed, 163 insertions(+), 160 deletions(-) delete mode 100644 text_recognizer/networks/encoders/efficient_net/__init__.py delete mode 100644 text_recognizer/networks/encoders/efficient_net/block.py create mode 100644 text_recognizer/networks/encoders/efficientnet/__init__.py create mode 100644 text_recognizer/networks/encoders/efficientnet/mbconv_block.py diff --git a/text_recognizer/networks/encoders/efficient_net/__init__.py b/text_recognizer/networks/encoders/efficient_net/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/text_recognizer/networks/encoders/efficient_net/block.py b/text_recognizer/networks/encoders/efficient_net/block.py deleted file mode 100644 index d9a0416..0000000 --- a/text_recognizer/networks/encoders/efficient_net/block.py +++ /dev/null @@ -1,160 +0,0 @@ -"""Mobile inverted residual block.""" -from typing import Tuple - -import torch -from torch import nn, Tensor -from torch.nn import functional as F - -from .utils import get_same_padding_conv2d - - -class MBConvBlock(nn.Module): - """Mobile Inverted Residual Bottleneck block.""" - - def __init__( - self, - in_channels: int, - kernel_size: int, - stride: int, - bn_momentum: float, - bn_eps: float, - se_ratio: float, - id_skip: bool, - expand_ratio: int, - image_size: Tuple[int, int], - ) -> None: - super().__init__() - self.kernel_size = kernel_size - self.bn_momentum = bn_momentum - self.bn_eps = bn_eps - self.has_se = se_ratio is not None and 0.0 < se_ratio < 1.0 - - out_channels = in_channels * expand_ratio - self._inverted_bottleneck = ( - self._configure_inverted_bottleneck( - image_size=image_size, - in_channels=in_channels, - out_channels=out_channels, - ) - if expand_ratio != 1 - else None - ) - - self._depthwise = self._configure_depthwise( - image_size=image_size, - in_channels=in_channels, - out_channels=out_channels, - groups=out_channels, - kernel_size=kernel_size, - stride=stride, - ) - - image_size = calculate_output_image_size(image_size, stride) - self._squeeze_excite = ( - self._configure_squeeze_excite( - in_channels=out_channels, out_channels=out_channels, se_ratio=se_ratio - ) - if self.has_se - else None - ) - - self._pointwise = self._configure_pointwise( - image_size=image_size, in_channels=out_channels, out_channels=out_channels - ) - - def _configure_inverted_bottleneck( - self, - image_size: Tuple[int, int], - in_channels: int, - out_channels: int, - ) -> nn.Sequential: - """Expansion phase.""" - Conv2d = get_same_padding_conv2d(image_size=image_size) - return nn.Sequential( - Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - bias=False, - ), - nn.BatchNorm2d( - num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps - ), - nn.SiLU(inplace=True), - ) - - def _configure_depthwise( - self, - image_size: Tuple[int, int], - in_channels: int, - out_channels: int, - groups: int, - kernel_size: int, - stride: int, - ) -> nn.Sequential: - Conv2d = get_same_padding_conv2d(image_size=image_size) - return nn.Sequential( - Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - groups=groups, - bias=False, - ), - nn.BatchNorm2d( - num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps - ), - nn.SiLU(inplace=True), - ) - - def _configure_squeeze_excite( - self, in_channels: int, out_channels: int, se_ratio: float - ) -> nn.Sequential: - Conv2d = get_same_padding_conv2d(image_size=(1, 1)) - num_squeezed_channels = max(1, int(in_channels * se_ratio)) - return nn.Sequential( - Conv2d( - in_channels=in_channels, - out_channels=num_squeezed_channels, - kernel_size=1, - ), - nn.SiLU(inplace=True), - Conv2d( - in_channels=num_squeezed_channels, - out_channels=out_channels, - kernel_size=1, - ), - ) - - def _configure_pointwise( - self, image_size: Tuple[int, int], in_channels: int, out_channels: int - ) -> nn.Sequential: - Conv2d = get_same_padding_conv2d(image_size=image_size) - return nn.Sequential( - Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - bias=False, - ), - nn.BatchNorm2d( - num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps - ), - ) - - def forward(self, x: Tensor, drop_connection_rate: Optional[float]) -> Tensor: - residual = x - if self._inverted_bottleneck is not None: - x = self._inverted_bottleneck(x) - - x = self._depthwise(x) - - if self._squeeze_excite is not None: - x_squeezed = F.adaptive_avg_pool2d(x, 1) - x_squeezed = self._squeeze_excite(x) - x = torch.sigmoid(x_squeezed) * x - - x = self._pointwise(x) - - # Stochastic depth diff --git a/text_recognizer/networks/encoders/efficientnet/__init__.py b/text_recognizer/networks/encoders/efficientnet/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/text_recognizer/networks/encoders/efficientnet/mbconv_block.py b/text_recognizer/networks/encoders/efficientnet/mbconv_block.py new file mode 100644 index 0000000..0384cd9 --- /dev/null +++ b/text_recognizer/networks/encoders/efficientnet/mbconv_block.py @@ -0,0 +1,163 @@ +"""Mobile inverted residual block.""" +from typing import Tuple + +import torch +from torch import nn, Tensor +from torch.nn import functional as F + +from .utils import get_same_padding_conv2d + + +class MBConvBlock(nn.Module): + """Mobile Inverted Residual Bottleneck block.""" + + def __init__( + self, + in_channels: int, + kernel_size: int, + stride: int, + bn_momentum: float, + bn_eps: float, + se_ratio: float, + id_skip: bool, + expand_ratio: int, + image_size: Tuple[int, int], + ) -> None: + super().__init__() + self.kernel_size = kernel_size + self.bn_momentum = bn_momentum + self.bn_eps = bn_eps + self.id_skip = id_skip + self.has_se = se_ratio is not None and 0.0 < se_ratio < 1.0 + + + def _build(self, image_size: Tuple[int, int], in_channels: int, kernel_size: int, stride: int, expand_ratio: int) -> None: + inner_channels = in_channels * expand_ratio + self._inverted_bottleneck = ( + self._configure_inverted_bottleneck( + image_size=image_size, + in_channels=in_channels, + out_channels=inner_channels, + ) + if expand_ratio != 1 + else None + ) + + self._depthwise = self._configure_depthwise( + image_size=image_size, + in_channels=in_channels, + out_channels=inner_channels, + groups=inner_channels, + kernel_size=kernel_size, + stride=stride, + ) + + image_size = calculate_output_image_size(image_size, stride) + self._squeeze_excite = ( + self._configure_squeeze_excite( + in_channels=inner_channels, out_channels=inner_channels, se_ratio=se_ratio + ) + if self.has_se + else None + ) + + self._pointwise = self._configure_pointwise( + image_size=image_size, in_channels=inner_channels, out_channels=out_channels + ) + + def _configure_inverted_bottleneck( + self, + image_size: Tuple[int, int], + in_channels: int, + out_channels: int, + ) -> nn.Sequential: + """Expansion phase.""" + Conv2d = get_same_padding_conv2d(image_size=image_size) + return nn.Sequential( + Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + bias=False, + ), + nn.BatchNorm2d( + num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps + ), + nn.SiLU(inplace=True), + ) + + def _configure_depthwise( + self, + image_size: Tuple[int, int], + in_channels: int, + out_channels: int, + groups: int, + kernel_size: int, + stride: int, + ) -> nn.Sequential: + Conv2d = get_same_padding_conv2d(image_size=image_size) + return nn.Sequential( + Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + groups=groups, + bias=False, + ), + nn.BatchNorm2d( + num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps + ), + nn.SiLU(inplace=True), + ) + + def _configure_squeeze_excite( + self, in_channels: int, out_channels: int, se_ratio: float + ) -> nn.Sequential: + Conv2d = get_same_padding_conv2d(image_size=(1, 1)) + num_squeezed_channels = max(1, int(in_channels * se_ratio)) + return nn.Sequential( + Conv2d( + in_channels=in_channels, + out_channels=num_squeezed_channels, + kernel_size=1, + ), + nn.SiLU(inplace=True), + Conv2d( + in_channels=num_squeezed_channels, + out_channels=out_channels, + kernel_size=1, + ), + ) + + def _configure_pointwise( + self, image_size: Tuple[int, int], in_channels: int, out_channels: int + ) -> nn.Sequential: + Conv2d = get_same_padding_conv2d(image_size=image_size) + return nn.Sequential( + Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + bias=False, + ), + nn.BatchNorm2d( + num_features=out_channels, momentum=self.bn_momentum, eps=self.bn_eps + ), + ) + + def forward(self, x: Tensor, drop_connection_rate: Optional[float]) -> Tensor: + residual = x + if self._inverted_bottleneck is not None: + x = self._inverted_bottleneck(x) + + x = self._depthwise(x) + + if self._squeeze_excite is not None: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + x_squeezed = self._squeeze_excite(x) + x = torch.sigmoid(x_squeezed) * x + + x = self._pointwise(x) + + # Stochastic depth -- cgit v1.2.3-70-g09d2