diff options
Diffstat (limited to 'text_recognizer/networks/transformer')
-rw-r--r-- | text_recognizer/networks/transformer/attention.py | 2 | ||||
-rw-r--r-- | text_recognizer/networks/transformer/norm.py | 4 |
2 files changed, 4 insertions, 2 deletions
diff --git a/text_recognizer/networks/transformer/attention.py b/text_recognizer/networks/transformer/attention.py index b86636e..87792a9 100644 --- a/text_recognizer/networks/transformer/attention.py +++ b/text_recognizer/networks/transformer/attention.py @@ -20,7 +20,6 @@ class Attention(nn.Module): """Standard attention.""" def __attrs_pre_init__(self) -> None: - """Pre init constructor.""" super().__init__() dim: int = attr.ib() @@ -34,7 +33,6 @@ class Attention(nn.Module): fc: nn.Linear = attr.ib(init=False) def __attrs_post_init__(self) -> None: - """Post init configuration.""" self.scale = self.dim ** -0.5 inner_dim = self.num_heads * self.dim_head diff --git a/text_recognizer/networks/transformer/norm.py b/text_recognizer/networks/transformer/norm.py index c59744a..98f4d7f 100644 --- a/text_recognizer/networks/transformer/norm.py +++ b/text_recognizer/networks/transformer/norm.py @@ -12,6 +12,8 @@ from torch import Tensor class ScaleNorm(nn.Module): + """Scaled normalization.""" + def __init__(self, normalized_shape: int, eps: float = 1.0e-5) -> None: super().__init__() self.scale = normalized_shape ** -0.5 @@ -25,6 +27,8 @@ class ScaleNorm(nn.Module): class PreNorm(nn.Module): + """Applies layer normalization then function.""" + def __init__(self, normalized_shape: int, fn: Type[nn.Module]) -> None: super().__init__() self.norm = nn.LayerNorm(normalized_shape) |