diff options
Diffstat (limited to 'text_recognizer/networks/transformer')
3 files changed, 6 insertions, 4 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py index a3b53f0..7bafc58 100644 --- a/text_recognizer/networks/transformer/attention.py +++ b/text_recognizer/networks/transformer/attention.py @@ -91,12 +91,12 @@ class Attention(nn.Module): def forward( self, x: Tensor, - context: Optional[Tensor], - mask: Optional[Tensor], - context_mask: Optional[Tensor], + context: Optional[Tensor] = None, + mask: Optional[Tensor] = None, + context_mask: Optional[Tensor] = None, rotary_pos_emb: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor]: - b, n, _, device = x.shape, x.device + b, n, _, device = *x.shape, x.device q, k, v = self.qkv_fn(x) q, k = ( self._apply_rotary_emb(q, k, rotary_pos_emb) diff --git a/text_recognizer/networks/transformer/nystromer/attention.py b/text_recognizer/networks/transformer/nystromer/attention.py index 5ab19cf..695a0d7 100644 --- a/text_recognizer/networks/transformer/nystromer/attention.py +++ b/text_recognizer/networks/transformer/nystromer/attention.py @@ -47,6 +47,7 @@ class NystromAttention(nn.Module): dropout_rate: float = 0.0, ): super().__init__() + self.dim = dim self.residual = None self.eps = eps self.num_heads = num_heads diff --git a/text_recognizer/networks/transformer/nystromer/nystromer.py b/text_recognizer/networks/transformer/nystromer/nystromer.py index 799a811..2113f1f 100644 --- a/text_recognizer/networks/transformer/nystromer/nystromer.py +++ b/text_recognizer/networks/transformer/nystromer/nystromer.py @@ -29,6 +29,7 @@ class Nystromer(nn.Module): glu: bool = True, ) -> None: super().__init__() + self.dim = dim self.layers = nn.ModuleList( [ nn.ModuleList( |