diff options
Diffstat (limited to 'text_recognizer/networks/perceiver')
-rw-r--r-- | text_recognizer/networks/perceiver/__init__.py | 1 | ||||
-rw-r--r-- | text_recognizer/networks/perceiver/attention.py | 10 | ||||
-rw-r--r-- | text_recognizer/networks/perceiver/perceiver.py | 22 |
3 files changed, 19 insertions, 14 deletions
diff --git a/text_recognizer/networks/perceiver/__init__.py b/text_recognizer/networks/perceiver/__init__.py index e69de29..ac2c102 100644 --- a/text_recognizer/networks/perceiver/__init__.py +++ b/text_recognizer/networks/perceiver/__init__.py @@ -0,0 +1 @@ +from text_recognizer.networks.perceiver.perceiver import PerceiverIO diff --git a/text_recognizer/networks/perceiver/attention.py b/text_recognizer/networks/perceiver/attention.py index 66aeaa8..0ee51b1 100644 --- a/text_recognizer/networks/perceiver/attention.py +++ b/text_recognizer/networks/perceiver/attention.py @@ -36,11 +36,11 @@ class Attention(nn.Module): q, v, k = map(lambda t: rearrange(t, "b n (h d) -> (b h) n d", h=h), (q, k, v)) sim = einsum("b i d, b j d -> b i j", q, k) * self.scale - if mask is not None: - mask = rearrange(mask, "b ... -> b (...)") - max_neg_val = -torch.finfo(sim.dtype).max - mask = repeat(mask, "b j -> (b h) () j", h=h) - sim.masked_fill_(~mask, max_neg_val) + # if mask is not None: + # mask = rearrange(mask, "b ... -> b (...)") + # max_neg_val = -torch.finfo(sim.dtype).max + # mask = repeat(mask, "b j -> (b h) () j", h=h) + # sim.masked_fill_(~mask, max_neg_val) attn = sim.softmax(dim=-1) out = einsum("b i j, b j d -> b i d", attn, v) diff --git a/text_recognizer/networks/perceiver/perceiver.py b/text_recognizer/networks/perceiver/perceiver.py index 65ee20c..d4bca0b 100644 --- a/text_recognizer/networks/perceiver/perceiver.py +++ b/text_recognizer/networks/perceiver/perceiver.py @@ -2,9 +2,9 @@ A copy from lucidrains. """ -from itertools import repeat from typing import Optional +from einops import repeat, rearrange import torch from torch import nn, Tensor @@ -44,13 +44,17 @@ class PerceiverIO(nn.Module): self.layers = nn.ModuleList( [ - [ - PreNorm( - latent_dim, - Attention(latent_dim, heads=latent_heads, dim_head=latent_dim), - ), - PreNorm(latent_dim, FeedForward(latent_dim)), - ] + nn.ModuleList( + [ + PreNorm( + latent_dim, + Attention( + latent_dim, heads=latent_heads, dim_head=latent_dim + ), + ), + PreNorm(latent_dim, FeedForward(latent_dim)), + ] + ) for _ in range(depth) ] ) @@ -69,7 +73,7 @@ class PerceiverIO(nn.Module): self, data: Tensor, queries: Tensor, mask: Optional[Tensor] = None ) -> Tensor: b = data.shape[0] - x = repeat(self.latents, "nd -> bnd", b=b) + x = repeat(self.latents, "n d -> b n d", b=b) cross_attn, cross_ff = self.cross_attn_block |