summaryrefslogtreecommitdiff
path: root/text_recognizer/networks/transformer
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-06-07 22:57:24 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-06-07 22:57:24 +0200
commit87ea75c803400ca74c4f1b863c496165ed802fc2 (patch)
tree6730c664b558691af0693a61d09d3aaf02366069 /text_recognizer/networks/transformer
parent01d6e5fc066969283df99c759609df441151e9c5 (diff)
Working feedforward of full transformer arch in notebook
Diffstat (limited to 'text_recognizer/networks/transformer')
-rw-r--r--text_recognizer/networks/transformer/attention.py8
-rw-r--r--text_recognizer/networks/transformer/nystromer/attention.py1
-rw-r--r--text_recognizer/networks/transformer/nystromer/nystromer.py1
3 files changed, 6 insertions, 4 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py
index a3b53f0..7bafc58 100644
--- a/text_recognizer/networks/transformer/attention.py
+++ b/text_recognizer/networks/transformer/attention.py
@@ -91,12 +91,12 @@ class Attention(nn.Module):
def forward(
self,
x: Tensor,
- context: Optional[Tensor],
- mask: Optional[Tensor],
- context_mask: Optional[Tensor],
+ context: Optional[Tensor] = None,
+ mask: Optional[Tensor] = None,
+ context_mask: Optional[Tensor] = None,
rotary_pos_emb: Optional[Tensor] = None,
) -> Tuple[Tensor, Tensor]:
- b, n, _, device = x.shape, x.device
+ b, n, _, device = *x.shape, x.device
q, k, v = self.qkv_fn(x)
q, k = (
self._apply_rotary_emb(q, k, rotary_pos_emb)
diff --git a/text_recognizer/networks/transformer/nystromer/attention.py b/text_recognizer/networks/transformer/nystromer/attention.py
index 5ab19cf..695a0d7 100644
--- a/text_recognizer/networks/transformer/nystromer/attention.py
+++ b/text_recognizer/networks/transformer/nystromer/attention.py
@@ -47,6 +47,7 @@ class NystromAttention(nn.Module):
dropout_rate: float = 0.0,
):
super().__init__()
+ self.dim = dim
self.residual = None
self.eps = eps
self.num_heads = num_heads
diff --git a/text_recognizer/networks/transformer/nystromer/nystromer.py b/text_recognizer/networks/transformer/nystromer/nystromer.py
index 799a811..2113f1f 100644
--- a/text_recognizer/networks/transformer/nystromer/nystromer.py
+++ b/text_recognizer/networks/transformer/nystromer/nystromer.py
@@ -29,6 +29,7 @@ class Nystromer(nn.Module):
glu: bool = True,
) -> None:
super().__init__()
+ self.dim = dim
self.layers = nn.ModuleList(
[
nn.ModuleList(