summaryrefslogtreecommitdiff
path: root/text_recognizer/model
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2023-09-03 01:14:41 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2023-09-03 01:14:41 +0200
commit53cfc21cffa4e877ad0959170b47b690d2fdb40f (patch)
tree88eaabb75a5ca6fba3c43655ccb754a9f0c5d649 /text_recognizer/model
parentd020059f2f71fe7c25765dde9d535195c09ece01 (diff)
Update lit models
Diffstat (limited to 'text_recognizer/model')
-rw-r--r--text_recognizer/model/base.py7
-rw-r--r--text_recognizer/model/greedy_decoder.py2
-rw-r--r--text_recognizer/model/transformer.py6
3 files changed, 4 insertions, 11 deletions
diff --git a/text_recognizer/model/base.py b/text_recognizer/model/base.py
index adcb8da..9a751bf 100644
--- a/text_recognizer/model/base.py
+++ b/text_recognizer/model/base.py
@@ -7,7 +7,6 @@ from loguru import logger as log
from omegaconf import DictConfig
import pytorch_lightning as L
from torch import nn, Tensor
-from torchmetrics import Accuracy
from text_recognizer.data.tokenizer import Tokenizer
@@ -24,17 +23,11 @@ class LitBase(L.LightningModule):
tokenizer: Tokenizer,
) -> None:
super().__init__()
-
self.network = network
self.loss_fn = loss_fn
self.optimizer_config = optimizer_config
self.lr_scheduler_config = lr_scheduler_config
self.tokenizer = tokenizer
- ignore_index = int(self.tokenizer.get_value("<p>"))
- # Placeholders
- self.train_acc = Accuracy(mdmc_reduce="samplewise", ignore_index=ignore_index)
- self.val_acc = Accuracy(mdmc_reduce="samplewise", ignore_index=ignore_index)
- self.test_acc = Accuracy(mdmc_reduce="samplewise", ignore_index=ignore_index)
def optimizer_zero_grad(
self,
diff --git a/text_recognizer/model/greedy_decoder.py b/text_recognizer/model/greedy_decoder.py
index 2c4c16e..8d55a02 100644
--- a/text_recognizer/model/greedy_decoder.py
+++ b/text_recognizer/model/greedy_decoder.py
@@ -34,7 +34,7 @@ class GreedyDecoder:
for i in range(1, self.max_output_len):
tokens = indecies[:, :i] # (B, Sy)
logits = self.network.decode(tokens, img_features) # [ B, N, C ]
- indecies_ = torch.argmax(logits, dim=2) # [ B, N ]
+ indecies_ = logits.argmax(dim=2) # [ B, N ]
indecies[:, i] = indecies_[:, -1]
# Early stopping of prediction loop if token is end or padding token.
diff --git a/text_recognizer/model/transformer.py b/text_recognizer/model/transformer.py
index ae6947c..598d995 100644
--- a/text_recognizer/model/transformer.py
+++ b/text_recognizer/model/transformer.py
@@ -8,7 +8,7 @@ from torchmetrics import CharErrorRate, WordErrorRate
from .greedy_decoder import GreedyDecoder
from text_recognizer.data.tokenizer import Tokenizer
-from text_recognizer.model.base import LitBase
+from .base import LitBase
class LitTransformer(LitBase):
@@ -45,7 +45,7 @@ class LitTransformer(LitBase):
logits = self.network(data, targets) # [B, N, C]
return logits.permute(0, 2, 1) # [B, C, N]
- def training_step(self, batch: Tuple[Tensor, Tensor], batch_idx: int) -> Tensor:
+ def training_step(self, batch: Tuple[Tensor, Tensor], batch_idx: int) -> dict:
"""Training step."""
data, targets = batch
logits = self.teacher_forward(data, targets[:, :-1])
@@ -61,7 +61,7 @@ class LitTransformer(LitBase):
), self.tokenizer.batch_decode(targets)
outputs.update({"predictions": preds, "ground_truths": gts})
- return loss
+ return outputs
def validation_step(self, batch: Tuple[Tensor, Tensor], batch_idx: int) -> dict:
"""Validation step."""