summaryrefslogtreecommitdiff
path: root/src/text_recognizer/networks/line_lstm_ctc.py
diff options
context:
space:
mode:
authoraktersnurra <gustaf.rydholm@gmail.com>2020-09-14 22:15:47 +0200
committeraktersnurra <gustaf.rydholm@gmail.com>2020-09-14 22:15:47 +0200
commit3b06ef615a8db67a03927576e0c12fbfb2501f5f (patch)
treee1c2b1289971c8480327408de46152481e99b539 /src/text_recognizer/networks/line_lstm_ctc.py
parent2b63fd952bdc9c7c72edd501cbcdbf3231e98f00 (diff)
Fixed CTC loss.
Diffstat (limited to 'src/text_recognizer/networks/line_lstm_ctc.py')
-rw-r--r--src/text_recognizer/networks/line_lstm_ctc.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/text_recognizer/networks/line_lstm_ctc.py b/src/text_recognizer/networks/line_lstm_ctc.py
index 988b615..5c57479 100644
--- a/src/text_recognizer/networks/line_lstm_ctc.py
+++ b/src/text_recognizer/networks/line_lstm_ctc.py
@@ -33,8 +33,9 @@ class LineRecurrentNetwork(nn.Module):
self.hidden_size = hidden_size
self.encoder = self._configure_encoder(encoder)
self.flatten = flatten
+ self.fc = nn.Linear(in_features=self.input_size, out_features=self.hidden_size)
self.rnn = nn.LSTM(
- input_size=self.input_size,
+ input_size=self.hidden_size,
hidden_size=self.hidden_size,
num_layers=num_layers,
)
@@ -73,6 +74,9 @@ class LineRecurrentNetwork(nn.Module):
# Avgerage pooling.
x = reduce(x, "(b t) c h w -> t b c", "mean", b=b, t=t) if self.flatten else x
+ # Linear layer between CNN and RNN
+ x = self.fc(x)
+
# Sequence predictions.
x, _ = self.rnn(x)