Working on attention

author: Gustaf Rydholm <gustaf.rydholm@gmail.com> 2021-05-02 14:10:53 +0200
committer: Gustaf Rydholm <gustaf.rydholm@gmail.com> 2021-05-02 14:10:53 +0200
commit: 1baeae6b414f71906bd1480d3ddc393ae878bd63 (patch)
tree: 288550c42a7dfea43e8464b00adfa7e47ef2bc5e /text_recognizer/networks/transformer
parent: 1d0977585f01c42e9f6280559a1a98037907a62e (diff)
1 files changed, 7 insertions, 0 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py
index e1324af..8724691 100644
--- a/text_recognizer/networks/transformer/attention.py
+++ b/text_recognizer/networks/transformer/attention.py
@@ -58,6 +58,7 @@ class Attention(nn.Module):
         context_mask: Optional[Tensor],
         rotary_pos_emb: Optional[Tensor] = None,
     ) -> Tuple[Tensor, Tensor]:
+        b, n, _, device = x.shape, x.device
         q, k, v = self.qkv_fn(x)
         q, k = (
             self._apply_rotary_emb(q, k, rotary_pos_emb)
@@ -66,7 +67,13 @@ class Attention(nn.Module):
             k,
         )
 
+        input_mask = None
         if any(x is not None for x in (mask, context_mask)):
+            q_mask = (
+                mask
+                if mask is not None
+                else lambda: torch.ones((b, n), device=device).bool()
+            )
             pass
 
         # Compute the attention
author	Gustaf Rydholm <gustaf.rydholm@gmail.com>	2021-05-02 14:10:53 +0200
committer	Gustaf Rydholm <gustaf.rydholm@gmail.com>	2021-05-02 14:10:53 +0200
commit	1baeae6b414f71906bd1480d3ddc393ae878bd63 (patch)
tree	288550c42a7dfea43e8464b00adfa7e47ef2bc5e /text_recognizer/networks/transformer
parent	1d0977585f01c42e9f6280559a1a98037907a62e (diff)