summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/text_recognizer/networks/unet.py134
1 files changed, 134 insertions, 0 deletions
diff --git a/src/text_recognizer/networks/unet.py b/src/text_recognizer/networks/unet.py
new file mode 100644
index 0000000..eb4188b
--- /dev/null
+++ b/src/text_recognizer/networks/unet.py
@@ -0,0 +1,134 @@
+"""UNet for segmentation."""
+from typing import List, Tuple
+
+import torch
+from torch import nn
+from torch import Tensor
+
+from text_recognizer.networks.util import activation_function
+
+
+class ConvBlock(nn.Module):
+ """Basic UNet convolutional block."""
+
+ def __init__(self, channels: List[int], activation: str) -> None:
+ super().__init__()
+ self.channels = channels
+ self.activation = activation_function(activation)
+ self.block = self._configure_block()
+
+ def _configure_block(self) -> nn.Sequential:
+ block = []
+ for i in range(len(self.channels) - 1):
+ block += [
+ nn.Conv2d(
+ self.channels[i], self.channels[i + 1], kernel_size=3, padding=1
+ ),
+ nn.BatchNorm2d(self.channels[i + 1]),
+ self.activation,
+ ]
+
+ return nn.Sequential(*block)
+
+ def forward(self, x: Tensor) -> Tensor:
+ """Apply the convolutional block."""
+ return self.block(x)
+
+
+class DownSamplingBlock(nn.Module):
+ """Basic down sampling block."""
+
+ def __init__(
+ self, channels: List[int], activation: str, pooling_kernel: int = 2
+ ) -> None:
+ super().__init__()
+ self.conv_block = ConvBlock(channels, activation)
+ self.down_sampling = nn.MaxPool2d(pooling_kernel)
+
+ def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]:
+ """Return the convolutional block output and a down sampled tensor."""
+ x = self.conv_block(x)
+ return self.down_sampling(x), x
+
+
+class UpSamplingBlock(nn.Module):
+ """The upsampling block of the UNet."""
+
+ def __init__(
+ self, channels: List[int], activation: str, scale_factor: int = 2
+ ) -> None:
+ super().__init__()
+ self.conv_block = ConvBlock(channels, activation)
+ self.up_sampling = nn.Upsample(
+ scale_factor=scale_factor, mode="bilinear", align_corners=True
+ )
+
+ def forward(self, x: Tensor, x_skip: Tensor) -> Tensor:
+ """Apply the up sampling and convolutional block."""
+ x = self.up_sampling(x)
+ x = torch.cat((x, x_skip), dim=1)
+ return self.conv_block(x)
+
+
+class UNet(nn.Module):
+ """UNet architecture."""
+
+ def __init__(
+ self,
+ in_channels: int = 1,
+ base_channels: int = 64,
+ depth: int = 4,
+ out_channels: int = 3,
+ activation: str = "relu",
+ pooling_kernel: int = 2,
+ scale_factor: int = 2,
+ ) -> None:
+ super().__init__()
+ channels = [base_channels * 2 ** i for i in range(depth)]
+ self.down_sampling_blocks = self._configure_down_sampling_blocks(
+ channels, activation, pooling_kernel
+ )
+ self.up_sampling_blocks = self._configure_up_sampling_blocks(
+ channels, activation, scale_factor
+ )
+
+ def _configure_down_sampling_blocks(
+ self, channels: List[int], activation: str, pooling_kernel: int
+ ) -> nn.ModuleList:
+ return nn.ModuleList(
+ [
+ DownSamplingBlock(
+ [channels[i], channels[i + 1], channels[i + 1]],
+ activation,
+ pooling_kernel,
+ )
+ for i in range(len(channels))
+ ]
+ )
+
+ def _configure_up_sampling_blocks(
+ self,
+ channels: List[int],
+ activation: str,
+ scale_factor: int,
+ ) -> nn.ModuleList:
+ return nn.ModuleList(
+ [
+ UpSamplingBlock(
+ [channels[i], channels[i + 1], channels[i + 1]],
+ activation,
+ scale_factor,
+ )
+ ]
+ for i in range(len(channels))
+ )
+
+ def down_sampling(self, x: Tensor) -> List[Tensor]:
+ x_skips = []
+ for block in self.down_sampling_blocks:
+ x, x_skip = block(x)
+ x_skips.append(x_skip)
+ return x, x_skips
+
+ def up_sampling(self, x: Tensor, x_skips: List[Tensor]) -> Tensor:
+ pass