summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--text_recognizer/networks/transformer/attention.py7
-rw-r--r--training/run_experiment.py4
2 files changed, 9 insertions, 2 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py
index e1324af..8724691 100644
--- a/text_recognizer/networks/transformer/attention.py
+++ b/text_recognizer/networks/transformer/attention.py
@@ -58,6 +58,7 @@ class Attention(nn.Module):
context_mask: Optional[Tensor],
rotary_pos_emb: Optional[Tensor] = None,
) -> Tuple[Tensor, Tensor]:
+ b, n, _, device = x.shape, x.device
q, k, v = self.qkv_fn(x)
q, k = (
self._apply_rotary_emb(q, k, rotary_pos_emb)
@@ -66,7 +67,13 @@ class Attention(nn.Module):
k,
)
+ input_mask = None
if any(x is not None for x in (mask, context_mask)):
+ q_mask = (
+ mask
+ if mask is not None
+ else lambda: torch.ones((b, n), device=device).bool()
+ )
pass
# Compute the attention
diff --git a/training/run_experiment.py b/training/run_experiment.py
index 2b3ecab..4e045c7 100644
--- a/training/run_experiment.py
+++ b/training/run_experiment.py
@@ -149,9 +149,9 @@ def run(config: DictConfig) -> None:
@hydra.main(config_path="conf", config_name="config")
-def main(cfg: DictConfig) -> None:
+def main(config: DictConfig) -> None:
"""Loads config with hydra."""
- run(cfg)
+ run(config)
if __name__ == "__main__":