Merge branch 'master' of github.com:aktersnurra/text-recognizer

author: aktersnurra <grydholm@kth.se> 2020-12-02 23:48:52 +0100
committer: aktersnurra <grydholm@kth.se> 2020-12-02 23:48:52 +0100
commit: 5529e0fc9ca39e81fe0f08a54f257d32f0afe120 (patch)
tree: f2be992554e278857db7d56786dba54a76d439c7 /src/text_recognizer/networks/crnn.py
parent: e3b039c9adb4bce42ede4cb682a3ae71e797539a (diff)
parent: 8e3985c9cde6666e4314973312135ec1c7a025b9 (diff)
1 files changed, 7 insertions, 5 deletions
diff --git a/src/text_recognizer/networks/crnn.py b/src/text_recognizer/networks/crnn.py
index 9747429..778e232 100644
--- a/src/text_recognizer/networks/crnn.py
+++ b/src/text_recognizer/networks/crnn.py
@@ -1,4 +1,4 @@
-"""LSTM with CTC for handwritten text recognition within a line."""
+"""CRNN for handwritten text recognition."""
 from typing import Dict, Tuple
 
 from einops import rearrange, reduce
@@ -89,20 +89,22 @@ class ConvolutionalRecurrentNetwork(nn.Module):
 
             x = self.backbone(x)
 
-            # Avgerage pooling.
+            # Average pooling.
             if self.avg_pool:
                 x = reduce(x, "(b t) c h w -> t b c", "mean", b=b, t=t)
             else:
                 x = rearrange(x, "(b t) h -> t b h", b=b, t=t)
         else:
             # Encode the entire image with a CNN, and use the channels as temporal dimension.
-            b = x.shape[0]
             x = self.backbone(x)
-            x = rearrange(x, "b c h w -> c b (h w)", b=b)
+            x = rearrange(x, "b c h w -> b w c h")
+            if self.adaptive_pool is not None:
+                x = self.adaptive_pool(x)
+            x = x.squeeze(3)
 
         # Sequence predictions.
         x, _ = self.rnn(x)
 
-        # Sequence to classifcation layer.
+        # Sequence to classification layer.
         x = self.decoder(x)
         return x
author	aktersnurra <grydholm@kth.se>	2020-12-02 23:48:52 +0100
committer	aktersnurra <grydholm@kth.se>	2020-12-02 23:48:52 +0100
commit	5529e0fc9ca39e81fe0f08a54f257d32f0afe120 (patch)
tree	f2be992554e278857db7d56786dba54a76d439c7 /src/text_recognizer/networks/crnn.py
parent	e3b039c9adb4bce42ede4cb682a3ae71e797539a (diff)
parent	8e3985c9cde6666e4314973312135ec1c7a025b9 (diff)