From 49ca6ade1a19f7f9c702171537fe4be0dfcda66d Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Fri, 25 Aug 2023 23:19:14 +0200 Subject: Rename and add flash atten --- .../network/transformer/embedding/__init__.py | 1 + .../network/transformer/embedding/absolute.py | 28 ++++++++++++++++++++++ .../network/transformer/embedding/l2_norm.py | 9 +++++++ .../network/transformer/embedding/sincos.py | 13 ++++++++++ .../network/transformer/embedding/token.py | 18 ++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 text_recognizer/network/transformer/embedding/__init__.py create mode 100644 text_recognizer/network/transformer/embedding/absolute.py create mode 100644 text_recognizer/network/transformer/embedding/l2_norm.py create mode 100644 text_recognizer/network/transformer/embedding/sincos.py create mode 100644 text_recognizer/network/transformer/embedding/token.py (limited to 'text_recognizer/network/transformer/embedding') diff --git a/text_recognizer/network/transformer/embedding/__init__.py b/text_recognizer/network/transformer/embedding/__init__.py new file mode 100644 index 0000000..bb3f904 --- /dev/null +++ b/text_recognizer/network/transformer/embedding/__init__.py @@ -0,0 +1 @@ +"""Positional encodings for transformers.""" diff --git a/text_recognizer/network/transformer/embedding/absolute.py b/text_recognizer/network/transformer/embedding/absolute.py new file mode 100644 index 0000000..08b2c2a --- /dev/null +++ b/text_recognizer/network/transformer/embedding/absolute.py @@ -0,0 +1,28 @@ +from typing import Optional + +import torch +from torch import nn, Tensor +from text_recognizer.network.transformer.embedding.l2_norm import l2_norm + + +class AbsolutePositionalEmbedding(nn.Module): + def __init__(self, dim: int, max_length: int, use_l2: bool = False) -> None: + super().__init__() + self.scale = dim**-0.5 if not use_l2 else 1.0 + self.max_length = max_length + self.use_l2 = use_l2 + self.to_embedding = nn.Embedding(max_length, dim) + if self.use_l2: + nn.init.normal_(self.to_embedding.weight, std=1e-5) + + def forward(self, x: Tensor, pos: Optional[Tensor] = None) -> Tensor: + n, device = x.shape[1], x.device + assert ( + n <= self.max_length + ), f"Sequence length {n} is greater than the maximum positional embedding {self.max_length}" + + if pos is None: + pos = torch.arange(n, device=device) + + embedding = self.to_embedding(pos) * self.scale + return l2_norm(embedding) if self.use_l2 else embedding diff --git a/text_recognizer/network/transformer/embedding/l2_norm.py b/text_recognizer/network/transformer/embedding/l2_norm.py new file mode 100644 index 0000000..0e48bca --- /dev/null +++ b/text_recognizer/network/transformer/embedding/l2_norm.py @@ -0,0 +1,9 @@ +from einops import rearrange +import torch.nn.functional as F +from torch import Tensor + + +def l2_norm(t: Tensor, groups=1) -> Tensor: + t = rearrange(t, "... (g d) -> ... g d", g=groups) + t = F.normalize(t, p=2, dim=-1) + return rearrange(t, "... g d -> ... (g d)") diff --git a/text_recognizer/network/transformer/embedding/sincos.py b/text_recognizer/network/transformer/embedding/sincos.py new file mode 100644 index 0000000..ed6b0ab --- /dev/null +++ b/text_recognizer/network/transformer/embedding/sincos.py @@ -0,0 +1,13 @@ +import torch + + +def sincos_2d(h, w, dim, temperature: int = 10000, dtype=torch.float32): + y, x = torch.meshgrid(torch.arange(h), torch.arange(w), indexing="ij") + assert (dim % 4) == 0, "feature dimension must be multiple of 4 for sincos emb" + omega = torch.arange(dim // 4) / (dim // 4 - 1) + omega = 1.0 / (temperature**omega) + + y = y.flatten()[:, None] * omega[None, :] + x = x.flatten()[:, None] * omega[None, :] + pe = torch.cat((x.sin(), x.cos(), y.sin(), y.cos()), dim=1) + return pe.type(dtype) diff --git a/text_recognizer/network/transformer/embedding/token.py b/text_recognizer/network/transformer/embedding/token.py new file mode 100644 index 0000000..1df2fd6 --- /dev/null +++ b/text_recognizer/network/transformer/embedding/token.py @@ -0,0 +1,18 @@ +from torch import nn, Tensor + +from text_recognizer.network.transformer.embedding.l2_norm import l2_norm + + +class TokenEmbedding(nn.Module): + def __init__(self, num_tokens: int, dim: int, use_l2: bool = True) -> None: + super().__init__() + self.use_l2 = use_l2 + self.to_embedding = nn.Embedding(num_tokens, dim) + if self.use_l2: + nn.init.normal_(self.to_embedding.weight, std=1e-5) + else: + nn.init.kaiming_normal_(self.to_embedding.weight) + + def forward(self, x: Tensor) -> Tensor: + embedding = self.to_embedding(x) + return l2_norm(embedding) if self.use_l2 else embedding -- cgit v1.2.3-70-g09d2