From 805d5726c17b83e00dcea0d2608dcd83a91f723d Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Thu, 6 May 2021 22:28:25 +0200 Subject: Working on attention layer configuration --- .../networks/transformer/attention_layers.py | 19 +++++++++++++++++++ .../networks/transformer/nystromer/nystromer.py | 8 ++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 text_recognizer/networks/transformer/attention_layers.py diff --git a/text_recognizer/networks/transformer/attention_layers.py b/text_recognizer/networks/transformer/attention_layers.py new file mode 100644 index 0000000..721fa27 --- /dev/null +++ b/text_recognizer/networks/transformer/attention_layers.py @@ -0,0 +1,19 @@ +"""Generates the attention layer architecture.""" +from typing import Type + +import torch +from torch import nn, Tensor + + +class AttentionLayers(nn.Module): + def __init__( + self, + dim: int, + depth: int, + num_heads: int, + norm_layer: Type[nn.Module], + causal: bool = False, + cross_attend: bool = False, + only_cross: bool = False, + ) -> None: + pass diff --git a/text_recognizer/networks/transformer/nystromer/nystromer.py b/text_recognizer/networks/transformer/nystromer/nystromer.py index 7cc889e..799a811 100644 --- a/text_recognizer/networks/transformer/nystromer/nystromer.py +++ b/text_recognizer/networks/transformer/nystromer/nystromer.py @@ -26,7 +26,8 @@ class Nystromer(nn.Module): residual: bool = True, residual_conv_kernel: int = 33, dropout_rate: float = 0.0, - ): + glu: bool = True, + ) -> None: super().__init__() self.layers = nn.ModuleList( [ @@ -45,7 +46,10 @@ class Nystromer(nn.Module): dropout_rate=dropout_rate, ), ), - PreNorm(dim, FeedForward(dim=dim, dropout_rate=dropout_rate)), + PreNorm( + dim, + FeedForward(dim=dim, glu=glu, dropout_rate=dropout_rate), + ), ] ) for _ in range(depth) -- cgit v1.2.3-70-g09d2