summaryrefslogtreecommitdiff
path: root/text_recognizer
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-04 15:15:26 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-08-04 15:15:26 +0200
commit04c40f790e405ced6e6b90cf0a8aea268b9345c4 (patch)
treed5e05ee09fa99ee8d56d5373bde18626274a1fdd /text_recognizer
parentd3afa310f77f47553586eeee58e3d3345a754e2c (diff)
Add char htr experiment, rename from ocr to htr, vqvae loss collapses
Diffstat (limited to 'text_recognizer')
-rw-r--r--text_recognizer/models/vqvae.py11
-rw-r--r--text_recognizer/networks/vqvae/quantizer.py2
2 files changed, 4 insertions, 9 deletions
diff --git a/text_recognizer/models/vqvae.py b/text_recognizer/models/vqvae.py
index 5890fd9..7f79b78 100644
--- a/text_recognizer/models/vqvae.py
+++ b/text_recognizer/models/vqvae.py
@@ -1,11 +1,8 @@
"""PyTorch Lightning model for base Transformers."""
-from typing import Any, Dict, Union, Tuple, Type
+from typing import Tuple
import attr
-from omegaconf import DictConfig
-from torch import nn
from torch import Tensor
-import wandb
from text_recognizer.models.base import BaseLitModel
@@ -25,7 +22,7 @@ class VQVAELitModel(BaseLitModel):
data, _ = batch
reconstructions, vq_loss = self(data)
loss = self.loss_fn(reconstructions, data)
- loss += self.latent_loss_weight * vq_loss
+ loss = loss + self.latent_loss_weight * vq_loss
self.log("train/loss", loss)
return loss
@@ -34,7 +31,7 @@ class VQVAELitModel(BaseLitModel):
data, _ = batch
reconstructions, vq_loss = self(data)
loss = self.loss_fn(reconstructions, data)
- loss += self.latent_loss_weight * vq_loss
+ loss = loss + self.latent_loss_weight * vq_loss
self.log("val/loss", loss, prog_bar=True)
def test_step(self, batch: Tuple[Tensor, Tensor], batch_idx: int) -> None:
@@ -42,5 +39,5 @@ class VQVAELitModel(BaseLitModel):
data, _ = batch
reconstructions, vq_loss = self(data)
loss = self.loss_fn(reconstructions, data)
- loss += self.latent_loss_weight * vq_loss
+ loss = loss + self.latent_loss_weight * vq_loss
self.log("test/loss", loss)
diff --git a/text_recognizer/networks/vqvae/quantizer.py b/text_recognizer/networks/vqvae/quantizer.py
index 5e0b602..1b59e78 100644
--- a/text_recognizer/networks/vqvae/quantizer.py
+++ b/text_recognizer/networks/vqvae/quantizer.py
@@ -83,8 +83,6 @@ class VectorQuantizer(nn.Module):
def compute_ema(self, one_hot_encoding: Tensor, latent: Tensor) -> None:
batch_cluster_size = one_hot_encoding.sum(axis=0)
batch_embedding_avg = (latent.t() @ one_hot_encoding).t()
- print(batch_cluster_size.shape)
- print(self.embedding._cluster_size.shape)
self.embedding._cluster_size.data.mul_(self.decay).add_(
batch_cluster_size, alpha=1 - self.decay
)