diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-06-07 22:57:24 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-06-07 22:57:24 +0200 |
commit | 87ea75c803400ca74c4f1b863c496165ed802fc2 (patch) | |
tree | 6730c664b558691af0693a61d09d3aaf02366069 /text_recognizer/networks/transformer/attention.py | |
parent | 01d6e5fc066969283df99c759609df441151e9c5 (diff) |
Working feedforward of full transformer arch in notebook
Diffstat (limited to 'text_recognizer/networks/transformer/attention.py')
-rw-r--r-- | text_recognizer/networks/transformer/attention.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py index a3b53f0..7bafc58 100644 --- a/text_recognizer/networks/transformer/attention.py +++ b/text_recognizer/networks/transformer/attention.py @@ -91,12 +91,12 @@ class Attention(nn.Module): def forward( self, x: Tensor, - context: Optional[Tensor], - mask: Optional[Tensor], - context_mask: Optional[Tensor], + context: Optional[Tensor] = None, + mask: Optional[Tensor] = None, + context_mask: Optional[Tensor] = None, rotary_pos_emb: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor]: - b, n, _, device = x.shape, x.device + b, n, _, device = *x.shape, x.device q, k, v = self.qkv_fn(x) q, k = ( self._apply_rotary_emb(q, k, rotary_pos_emb) |