diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-05-02 14:10:53 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-05-02 14:10:53 +0200 |
commit | 1baeae6b414f71906bd1480d3ddc393ae878bd63 (patch) | |
tree | 288550c42a7dfea43e8464b00adfa7e47ef2bc5e | |
parent | 1d0977585f01c42e9f6280559a1a98037907a62e (diff) |
Working on attention
-rw-r--r-- | text_recognizer/networks/transformer/attention.py | 7 | ||||
-rw-r--r-- | training/run_experiment.py | 4 |
2 files changed, 9 insertions, 2 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py index e1324af..8724691 100644 --- a/text_recognizer/networks/transformer/attention.py +++ b/text_recognizer/networks/transformer/attention.py @@ -58,6 +58,7 @@ class Attention(nn.Module): context_mask: Optional[Tensor], rotary_pos_emb: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor]: + b, n, _, device = x.shape, x.device q, k, v = self.qkv_fn(x) q, k = ( self._apply_rotary_emb(q, k, rotary_pos_emb) @@ -66,7 +67,13 @@ class Attention(nn.Module): k, ) + input_mask = None if any(x is not None for x in (mask, context_mask)): + q_mask = ( + mask + if mask is not None + else lambda: torch.ones((b, n), device=device).bool() + ) pass # Compute the attention diff --git a/training/run_experiment.py b/training/run_experiment.py index 2b3ecab..4e045c7 100644 --- a/training/run_experiment.py +++ b/training/run_experiment.py @@ -149,9 +149,9 @@ def run(config: DictConfig) -> None: @hydra.main(config_path="conf", config_name="config") -def main(cfg: DictConfig) -> None: +def main(config: DictConfig) -> None: """Loads config with hydra.""" - run(cfg) + run(config) if __name__ == "__main__": |