summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-05-06 22:28:25 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-05-06 22:28:25 +0200
commit805d5726c17b83e00dcea0d2608dcd83a91f723d (patch)
tree850af62a8e2d558d82aa4dbf5b8611755d256b89
parent4defc734b681071e19dd86404abd416d24330b9a (diff)
Working on attention layer configuration
-rw-r--r--text_recognizer/networks/transformer/attention_layers.py19
-rw-r--r--text_recognizer/networks/transformer/nystromer/nystromer.py8
2 files changed, 25 insertions, 2 deletions
diff --git a/text_recognizer/networks/transformer/attention_layers.py b/text_recognizer/networks/transformer/attention_layers.py
new file mode 100644
index 0000000..721fa27
--- /dev/null
+++ b/text_recognizer/networks/transformer/attention_layers.py
@@ -0,0 +1,19 @@
+"""Generates the attention layer architecture."""
+from typing import Type
+
+import torch
+from torch import nn, Tensor
+
+
+class AttentionLayers(nn.Module):
+ def __init__(
+ self,
+ dim: int,
+ depth: int,
+ num_heads: int,
+ norm_layer: Type[nn.Module],
+ causal: bool = False,
+ cross_attend: bool = False,
+ only_cross: bool = False,
+ ) -> None:
+ pass
diff --git a/text_recognizer/networks/transformer/nystromer/nystromer.py b/text_recognizer/networks/transformer/nystromer/nystromer.py
index 7cc889e..799a811 100644
--- a/text_recognizer/networks/transformer/nystromer/nystromer.py
+++ b/text_recognizer/networks/transformer/nystromer/nystromer.py
@@ -26,7 +26,8 @@ class Nystromer(nn.Module):
residual: bool = True,
residual_conv_kernel: int = 33,
dropout_rate: float = 0.0,
- ):
+ glu: bool = True,
+ ) -> None:
super().__init__()
self.layers = nn.ModuleList(
[
@@ -45,7 +46,10 @@ class Nystromer(nn.Module):
dropout_rate=dropout_rate,
),
),
- PreNorm(dim, FeedForward(dim=dim, dropout_rate=dropout_rate)),
+ PreNorm(
+ dim,
+ FeedForward(dim=dim, glu=glu, dropout_rate=dropout_rate),
+ ),
]
)
for _ in range(depth)