diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-04-05 23:24:20 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-04-05 23:24:20 +0200 |
commit | dedf8deb025ac9efdad5e9baf9165ef63d6829ff (patch) | |
tree | 56b10fcaef479d8abe9b0e6c05e07ad5e02b9ab0 /text_recognizer/networks/image_transformer.py | |
parent | 532286b516b17d279c321358bf03dddc8adc8029 (diff) |
Pre-commit fixes, optimizer loading fix
Diffstat (limited to 'text_recognizer/networks/image_transformer.py')
-rw-r--r-- | text_recognizer/networks/image_transformer.py | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/text_recognizer/networks/image_transformer.py b/text_recognizer/networks/image_transformer.py index aa024e0..85a84d2 100644 --- a/text_recognizer/networks/image_transformer.py +++ b/text_recognizer/networks/image_transformer.py @@ -1,9 +1,9 @@ """A Transformer with a cnn backbone. The network encodes a image with a convolutional backbone to a latent representation, -i.e. feature maps. A 2d positional encoding is applied to the feature maps for +i.e. feature maps. A 2d positional encoding is applied to the feature maps for spatial information. The resulting feature are then set to a transformer decoder -together with the target tokens. +together with the target tokens. TODO: Local attention for transformer.j @@ -107,9 +107,7 @@ class ImageTransformer(nn.Module): encoder_class = getattr(network_module, encoder.type) return encoder_class(**encoder.args) - def _configure_mapping( - self, mapping: str - ) -> Tuple[List[str], Dict[str, int]]: + def _configure_mapping(self, mapping: str) -> Tuple[List[str], Dict[str, int]]: """Configures mapping.""" if mapping == "emnist": mapping, inverse_mapping, _ = emnist_mapping() @@ -125,7 +123,7 @@ class ImageTransformer(nn.Module): Tensor: Image features. Shapes: - - image: :math: `(B, C, H, W)` + - image: :math: `(B, C, H, W)` - latent: :math: `(B, T, C)` """ |