summaryrefslogtreecommitdiff
path: root/text_recognizer/networks/conformer/conformer.py
diff options
context:
space:
mode:
Diffstat (limited to 'text_recognizer/networks/conformer/conformer.py')
-rw-r--r--text_recognizer/networks/conformer/conformer.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/text_recognizer/networks/conformer/conformer.py b/text_recognizer/networks/conformer/conformer.py
index e2dce27..09aad55 100644
--- a/text_recognizer/networks/conformer/conformer.py
+++ b/text_recognizer/networks/conformer/conformer.py
@@ -11,6 +11,7 @@ class Conformer(nn.Module):
def __init__(
self,
dim: int,
+ dim_gru: int,
num_classes: int,
subsampler: Type[nn.Module],
block: ConformerBlock,
@@ -19,10 +20,16 @@ class Conformer(nn.Module):
super().__init__()
self.subsampler = subsampler
self.blocks = nn.ModuleList([deepcopy(block) for _ in range(depth)])
- self.fc = nn.Linear(dim, num_classes, bias=False)
+ self.gru = nn.GRU(
+ dim, dim_gru, 1, bidirectional=True, batch_first=True, bias=False
+ )
+ self.fc = nn.Linear(dim_gru, num_classes)
def forward(self, x: Tensor) -> Tensor:
x = self.subsampler(x)
+ B, T, C = x.shape
for fn in self.blocks:
x = fn(x)
+ x, _ = self.gru(x)
+ x = x.view(B, T, 2, -1).sum(2)
return self.fc(x)