diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-08-04 22:15:36 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-08-04 22:15:36 +0200 |
commit | 1bccf71cf4eec335001b50a8fbc0c991d0e6d13a (patch) | |
tree | dd58219f94836a857dfbe794585d6f68ee28dbdc /text_recognizer/networks/vqvae/attention.py | |
parent | efe850821b88306481ab5aa2a5f79a2581e4458c (diff) |
Add conv attention, up and downsampling to vqvae module
Diffstat (limited to 'text_recognizer/networks/vqvae/attention.py')
-rw-r--r-- | text_recognizer/networks/vqvae/attention.py | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/text_recognizer/networks/vqvae/attention.py b/text_recognizer/networks/vqvae/attention.py new file mode 100644 index 0000000..5a6b3ce --- /dev/null +++ b/text_recognizer/networks/vqvae/attention.py @@ -0,0 +1,74 @@ +"""Convolutional attention block.""" +import attr +import torch +from torch import nn, Tensor +import torch.nn.functional as F + +from text_recognizer.networks.vqvae.norm import Normalize + + +@attr.s +class Attention(nn.Module): + """Convolutional attention.""" + + in_channels: int = attr.ib() + q: nn.Conv2d = attr.ib(init=False) + k: nn.Conv2d = attr.ib(init=False) + v: nn.Conv2d = attr.ib(init=False) + proj: nn.Conv2d = attr.ib(init=False) + norm: Normalize = attr.ib(init=False) + + def __attrs_post_init__(self) -> None: + """Post init configuration.""" + super().__init__() + self.q = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.k = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.v = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.norm = Normalize(num_channels=self.in_channels) + self.proj = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + def forward(self, x: Tensor) -> Tensor: + """Applies attention to feature maps.""" + residual = x + x = self.norm(x) + q = self.q(x) + k = self.k(x) + v = self.v(x) + + # Attention + B, C, H, W = q.shape + q = q.reshape(B, C, H * W).permute(0, 2, 1) # [B, HW, C] + k = k.reshape(B, C, H * W) # [B, C, HW] + energy = torch.bmm(q, k) * (C ** -0.5) + attention = F.softmax(energy, dim=2) + + # Compute attention to which values + v = v.reshape(B, C, H * W).permute(0, 2, 1) # [B, HW, C] + out = torch.bmm(v, attention) + out = out.reshape(B, C, H, W) + out = self.proj(out) + return out + residual |