diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-05-06 22:28:25 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-05-06 22:28:25 +0200 |
commit | 805d5726c17b83e00dcea0d2608dcd83a91f723d (patch) | |
tree | 850af62a8e2d558d82aa4dbf5b8611755d256b89 /text_recognizer/networks/transformer/nystromer | |
parent | 4defc734b681071e19dd86404abd416d24330b9a (diff) |
Working on attention layer configuration
Diffstat (limited to 'text_recognizer/networks/transformer/nystromer')
-rw-r--r-- | text_recognizer/networks/transformer/nystromer/nystromer.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/text_recognizer/networks/transformer/nystromer/nystromer.py b/text_recognizer/networks/transformer/nystromer/nystromer.py index 7cc889e..799a811 100644 --- a/text_recognizer/networks/transformer/nystromer/nystromer.py +++ b/text_recognizer/networks/transformer/nystromer/nystromer.py @@ -26,7 +26,8 @@ class Nystromer(nn.Module): residual: bool = True, residual_conv_kernel: int = 33, dropout_rate: float = 0.0, - ): + glu: bool = True, + ) -> None: super().__init__() self.layers = nn.ModuleList( [ @@ -45,7 +46,10 @@ class Nystromer(nn.Module): dropout_rate=dropout_rate, ), ), - PreNorm(dim, FeedForward(dim=dim, dropout_rate=dropout_rate)), + PreNorm( + dim, + FeedForward(dim=dim, glu=glu, dropout_rate=dropout_rate), + ), ] ) for _ in range(depth) |