diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-15 21:49:51 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2024-04-15 21:49:51 +0200 |
commit | b3fbfd72a8f647161685b28d20b4b61519d8a643 (patch) | |
tree | a5cac4e15186396aae35231d6d6fe266691b0186 /text_recognizer/network/transformer/decoder.py | |
parent | c7e5354ffa43eccfc4e411375ce2f531af7bbcff (diff) |
Update transformer model
Diffstat (limited to 'text_recognizer/network/transformer/decoder.py')
-rw-r--r-- | text_recognizer/network/transformer/decoder.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/text_recognizer/network/transformer/decoder.py b/text_recognizer/network/transformer/decoder.py index 4ebdd2c..60e426a 100644 --- a/text_recognizer/network/transformer/decoder.py +++ b/text_recognizer/network/transformer/decoder.py @@ -1,9 +1,9 @@ """Transformer decoder module.""" from typing import Optional + from torch import Tensor, nn from .attention import Attention -from .embedding.rotary import RotaryEmbedding class Decoder(nn.Module): @@ -15,6 +15,7 @@ class Decoder(nn.Module): dim_head: int, depth: int, dropout_rate: float = 0.0, + one_kv_head: bool = False, ) -> None: super().__init__() self.norm = nn.LayerNorm(dim) @@ -31,7 +32,8 @@ class Decoder(nn.Module): dropout_rate=dropout_rate, use_flash=True, norm_context=False, - rotary_emb=RotaryEmbedding(dim_head), + use_rotary_emb=True, + one_kv_head=one_kv_head, ), Attention( dim=dim, @@ -42,6 +44,8 @@ class Decoder(nn.Module): dropout_rate=dropout_rate, use_flash=True, norm_context=False, + use_rotary_emb=False, + one_kv_head=one_kv_head, ), ] ) |