diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-09-19 21:05:08 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-09-19 21:05:08 +0200 |
commit | 99886b4a9664b0716319e54f361091e2bfdf4b8f (patch) | |
tree | c77d52b7663493bb79c91bf2ac2bc1b4a6482283 /text_recognizer/models | |
parent | 27acef8b998a8e8dff2c4258027cef779cefca77 (diff) |
Add weight for commitment loss in vq transformer
Diffstat (limited to 'text_recognizer/models')
-rw-r--r-- | text_recognizer/models/vq_transformer.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/text_recognizer/models/vq_transformer.py b/text_recognizer/models/vq_transformer.py index 71ca2ef..a0d3892 100644 --- a/text_recognizer/models/vq_transformer.py +++ b/text_recognizer/models/vq_transformer.py @@ -1,11 +1,10 @@ """PyTorch Lightning model for base Transformers.""" -from typing import Tuple, Type, Set +from typing import Tuple import attr import torch from torch import Tensor -from text_recognizer.models.metrics import CharacterErrorRate from text_recognizer.models.transformer import TransformerLitModel @@ -13,6 +12,8 @@ from text_recognizer.models.transformer import TransformerLitModel class VqTransformerLitModel(TransformerLitModel): """A PyTorch Lightning model for transformer networks.""" + alpha: float = attr.ib(default=1.0) + def forward(self, data: Tensor) -> Tensor: """Forward pass with the transformer network.""" return self.predict(data) @@ -21,7 +22,7 @@ class VqTransformerLitModel(TransformerLitModel): """Training step.""" data, targets = batch logits, commitment_loss = self.network(data, targets[:-1]) - loss = self.loss_fn(logits, targets[1:]) + commitment_loss + loss = self.loss_fn(logits, targets[1:]) + self.alpha * commitment_loss self.log("train/loss", loss) self.log("train/commitment_loss", commitment_loss) return loss @@ -32,7 +33,7 @@ class VqTransformerLitModel(TransformerLitModel): # Compute the loss. logits, commitment_loss = self.network(data, targets[:-1]) - loss = self.loss_fn(logits, targets[1:]) + commitment_loss + loss = self.loss_fn(logits, targets[1:]) + self.alpha * commitment_loss self.log("val/loss", loss, prog_bar=True) self.log("val/commitment_loss", commitment_loss) |