diff options
Diffstat (limited to 'text_recognizer')
-rw-r--r-- | text_recognizer/models/vqvae.py | 11 | ||||
-rw-r--r-- | text_recognizer/networks/vqvae/quantizer.py | 2 |
2 files changed, 4 insertions, 9 deletions
diff --git a/text_recognizer/models/vqvae.py b/text_recognizer/models/vqvae.py index 5890fd9..7f79b78 100644 --- a/text_recognizer/models/vqvae.py +++ b/text_recognizer/models/vqvae.py @@ -1,11 +1,8 @@ """PyTorch Lightning model for base Transformers.""" -from typing import Any, Dict, Union, Tuple, Type +from typing import Tuple import attr -from omegaconf import DictConfig -from torch import nn from torch import Tensor -import wandb from text_recognizer.models.base import BaseLitModel @@ -25,7 +22,7 @@ class VQVAELitModel(BaseLitModel): data, _ = batch reconstructions, vq_loss = self(data) loss = self.loss_fn(reconstructions, data) - loss += self.latent_loss_weight * vq_loss + loss = loss + self.latent_loss_weight * vq_loss self.log("train/loss", loss) return loss @@ -34,7 +31,7 @@ class VQVAELitModel(BaseLitModel): data, _ = batch reconstructions, vq_loss = self(data) loss = self.loss_fn(reconstructions, data) - loss += self.latent_loss_weight * vq_loss + loss = loss + self.latent_loss_weight * vq_loss self.log("val/loss", loss, prog_bar=True) def test_step(self, batch: Tuple[Tensor, Tensor], batch_idx: int) -> None: @@ -42,5 +39,5 @@ class VQVAELitModel(BaseLitModel): data, _ = batch reconstructions, vq_loss = self(data) loss = self.loss_fn(reconstructions, data) - loss += self.latent_loss_weight * vq_loss + loss = loss + self.latent_loss_weight * vq_loss self.log("test/loss", loss) diff --git a/text_recognizer/networks/vqvae/quantizer.py b/text_recognizer/networks/vqvae/quantizer.py index 5e0b602..1b59e78 100644 --- a/text_recognizer/networks/vqvae/quantizer.py +++ b/text_recognizer/networks/vqvae/quantizer.py @@ -83,8 +83,6 @@ class VectorQuantizer(nn.Module): def compute_ema(self, one_hot_encoding: Tensor, latent: Tensor) -> None: batch_cluster_size = one_hot_encoding.sum(axis=0) batch_embedding_avg = (latent.t() @ one_hot_encoding).t() - print(batch_cluster_size.shape) - print(self.embedding._cluster_size.shape) self.embedding._cluster_size.data.mul_(self.decay).add_( batch_cluster_size, alpha=1 - self.decay ) |