summaryrefslogtreecommitdiff
path: root/text_recognizer/networks/image_transformer.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-05 23:24:20 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-05 23:24:20 +0200
commitdedf8deb025ac9efdad5e9baf9165ef63d6829ff (patch)
tree56b10fcaef479d8abe9b0e6c05e07ad5e02b9ab0 /text_recognizer/networks/image_transformer.py
parent532286b516b17d279c321358bf03dddc8adc8029 (diff)
Pre-commit fixes, optimizer loading fix
Diffstat (limited to 'text_recognizer/networks/image_transformer.py')
-rw-r--r--text_recognizer/networks/image_transformer.py10
1 files changed, 4 insertions, 6 deletions
diff --git a/text_recognizer/networks/image_transformer.py b/text_recognizer/networks/image_transformer.py
index aa024e0..85a84d2 100644
--- a/text_recognizer/networks/image_transformer.py
+++ b/text_recognizer/networks/image_transformer.py
@@ -1,9 +1,9 @@
"""A Transformer with a cnn backbone.
The network encodes a image with a convolutional backbone to a latent representation,
-i.e. feature maps. A 2d positional encoding is applied to the feature maps for
+i.e. feature maps. A 2d positional encoding is applied to the feature maps for
spatial information. The resulting feature are then set to a transformer decoder
-together with the target tokens.
+together with the target tokens.
TODO: Local attention for transformer.j
@@ -107,9 +107,7 @@ class ImageTransformer(nn.Module):
encoder_class = getattr(network_module, encoder.type)
return encoder_class(**encoder.args)
- def _configure_mapping(
- self, mapping: str
- ) -> Tuple[List[str], Dict[str, int]]:
+ def _configure_mapping(self, mapping: str) -> Tuple[List[str], Dict[str, int]]:
"""Configures mapping."""
if mapping == "emnist":
mapping, inverse_mapping, _ = emnist_mapping()
@@ -125,7 +123,7 @@ class ImageTransformer(nn.Module):
Tensor: Image features.
Shapes:
- - image: :math: `(B, C, H, W)`
+ - image: :math: `(B, C, H, W)`
- latent: :math: `(B, T, C)`
"""