summaryrefslogtreecommitdiff
path: root/text_recognizer/networks/transformer
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-06-06 23:19:35 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-06-06 23:19:35 +0200
commit01d6e5fc066969283df99c759609df441151e9c5 (patch)
treeecd1459e142356d0c7f50a61307b760aca813248 /text_recognizer/networks/transformer
parentf4688482b4898c0b342d6ae59839dc27fbf856c6 (diff)
Working on fixing decoder transformer
Diffstat (limited to 'text_recognizer/networks/transformer')
-rw-r--r--text_recognizer/networks/transformer/__init__.py2
-rw-r--r--text_recognizer/networks/transformer/layers.py5
-rw-r--r--text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py1
-rw-r--r--text_recognizer/networks/transformer/transformer.py7
4 files changed, 9 insertions, 6 deletions
diff --git a/text_recognizer/networks/transformer/__init__.py b/text_recognizer/networks/transformer/__init__.py
index a3f3011..d9e63ef 100644
--- a/text_recognizer/networks/transformer/__init__.py
+++ b/text_recognizer/networks/transformer/__init__.py
@@ -1 +1,3 @@
"""Transformer modules."""
+from .nystromer.nystromer import Nystromer
+from .vit import ViT
diff --git a/text_recognizer/networks/transformer/layers.py b/text_recognizer/networks/transformer/layers.py
index b2c703f..a44a525 100644
--- a/text_recognizer/networks/transformer/layers.py
+++ b/text_recognizer/networks/transformer/layers.py
@@ -1,8 +1,6 @@
"""Generates the attention layer architecture."""
from functools import partial
-from typing import Any, Dict, Optional, Type
-
-from click.types import Tuple
+from typing import Any, Dict, Optional, Tuple, Type
from torch import nn, Tensor
@@ -30,6 +28,7 @@ class AttentionLayers(nn.Module):
pre_norm: bool = True,
) -> None:
super().__init__()
+ self.dim = dim
attn_fn = partial(attn_fn, dim=dim, num_heads=num_heads, **attn_kwargs)
norm_fn = partial(norm_fn, dim)
ff_fn = partial(ff_fn, dim=dim, **ff_kwargs)
diff --git a/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py b/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py
index 9466f6e..7140537 100644
--- a/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py
+++ b/text_recognizer/networks/transformer/positional_encodings/absolute_embedding.py
@@ -1,4 +1,5 @@
"""Absolute positional embedding."""
+import torch
from torch import nn, Tensor
diff --git a/text_recognizer/networks/transformer/transformer.py b/text_recognizer/networks/transformer/transformer.py
index 60ab1ce..31088b4 100644
--- a/text_recognizer/networks/transformer/transformer.py
+++ b/text_recognizer/networks/transformer/transformer.py
@@ -19,7 +19,9 @@ class Transformer(nn.Module):
emb_dropout: float = 0.0,
use_pos_emb: bool = True,
) -> None:
+ super().__init__()
dim = attn_layers.dim
+ self.attn_layers = attn_layers
emb_dim = emb_dim if emb_dim is not None else dim
self.max_seq_len = max_seq_len
@@ -32,7 +34,6 @@ class Transformer(nn.Module):
)
self.project_emb = nn.Linear(emb_dim, dim) if emb_dim != dim else nn.Identity()
- self.attn_layers = attn_layers
self.norm = nn.LayerNorm(dim)
self._init_weights()
@@ -45,12 +46,12 @@ class Transformer(nn.Module):
def forward(
self,
x: Tensor,
- mask: Optional[Tensor],
+ mask: Optional[Tensor] = None,
return_embeddings: bool = False,
**kwargs: Any
) -> Tensor:
b, n, device = *x.shape, x.device
- x += self.token_emb(x)
+ x = self.token_emb(x)
if self.pos_emb is not None:
x += self.pos_emb(x)
x = self.emb_dropout(x)