diff options
Diffstat (limited to 'text_recognizer/networks/transformer')
4 files changed, 9 insertions, 6 deletions
diff --git a/text_recognizer/networks/transformer/__init__.py b/text_recognizer/networks/transformer/__init__.py index a3f3011..d9e63ef 100644 --- a/text_recognizer/networks/transformer/__init__.py +++ b/text_recognizer/networks/transformer/__init__.py @@ -1 +1,3 @@ """Transformer modules.""" +from .nystromer.nystromer import Nystromer +from .vit import ViT diff --git a/text_recognizer/networks/transformer/layers.py b/text_recognizer/networks/transformer/layers.py index b2c703f..a44a525 100644 --- a/text_recognizer/networks/transformer/layers.py +++ b/text_recognizer/networks/transformer/layers.py @@ -1,8 +1,6 @@ """Generates the attention layer architecture.""" from functools import partial -from typing import Any, Dict, Optional, Type - -from click.types import Tuple +from typing import Any, Dict, Optional, Tuple, Type from torch import nn, Tensor @@ -30,6 +28,7 @@ class AttentionLayers(nn.Module): pre_norm: bool = True, ) -> None: super().__init__() + self.dim = dim attn_fn = partial(attn_fn, dim=dim, num_heads=num_heads, **attn_kwargs) norm_fn = partial(norm_fn, dim) ff_fn = partial(ff_fn, dim=dim, **ff_kwargs) diff --git a/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py b/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py index 9466f6e..7140537 100644 --- a/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py +++ b/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py @@ -1,4 +1,5 @@ """Absolute positional embedding.""" +import torch from torch import nn, Tensor diff --git a/text_recognizer/networks/transformer/transformer.py b/text_recognizer/networks/transformer/transformer.py index 60ab1ce..31088b4 100644 --- a/text_recognizer/networks/transformer/transformer.py +++ b/text_recognizer/networks/transformer/transformer.py @@ -19,7 +19,9 @@ class Transformer(nn.Module): emb_dropout: float = 0.0, use_pos_emb: bool = True, ) -> None: + super().__init__() dim = attn_layers.dim + self.attn_layers = attn_layers emb_dim = emb_dim if emb_dim is not None else dim self.max_seq_len = max_seq_len @@ -32,7 +34,6 @@ class Transformer(nn.Module): ) self.project_emb = nn.Linear(emb_dim, dim) if emb_dim != dim else nn.Identity() - self.attn_layers = attn_layers self.norm = nn.LayerNorm(dim) self._init_weights() @@ -45,12 +46,12 @@ class Transformer(nn.Module): def forward( self, x: Tensor, - mask: Optional[Tensor], + mask: Optional[Tensor] = None, return_embeddings: bool = False, **kwargs: Any ) -> Tensor: b, n, device = *x.shape, x.device - x += self.token_emb(x) + x = self.token_emb(x) if self.pos_emb is not None: x += self.pos_emb(x) x = self.emb_dropout(x) |