From bd4bd443f339e95007bfdabf3e060db720f4d4b9 Mon Sep 17 00:00:00 2001
From: Gustaf Rydholm <gustaf.rydholm@gmail.com>
Date: Tue, 3 Aug 2021 18:18:48 +0200
Subject: Training working, multiple bug fixes

---
 training/callbacks/wandb_callbacks.py | 83 +++++++++++++++--------------------
 1 file changed, 36 insertions(+), 47 deletions(-)

(limited to 'training/callbacks')

diff --git a/training/callbacks/wandb_callbacks.py b/training/callbacks/wandb_callbacks.py
index 6379cc0..906531f 100644
--- a/training/callbacks/wandb_callbacks.py
+++ b/training/callbacks/wandb_callbacks.py
@@ -1,11 +1,10 @@
 """Weights and Biases callbacks."""
 from pathlib import Path
-from typing import List
 
-import attr
 import wandb
 from pytorch_lightning import Callback, LightningModule, Trainer
 from pytorch_lightning.loggers import LoggerCollection, WandbLogger
+from pytorch_lightning.utilities import rank_zero_only
 
 
 def get_wandb_logger(trainer: Trainer) -> WandbLogger:
@@ -22,31 +21,27 @@ def get_wandb_logger(trainer: Trainer) -> WandbLogger:
     raise Exception("Weight and Biases logger not found for some reason...")
 
 
-@attr.s
 class WatchModel(Callback):
     """Make W&B watch the model at the beginning of the run."""
 
-    log: str = attr.ib(default="gradients")
-    log_freq: int = attr.ib(default=100)
-
-    def __attrs_pre_init__(self) -> None:
-        super().__init__()
+    def __init__(self, log: str = "gradients", log_freq: int = 100) -> None:
+        self.log = log
+        self.log_freq = log_freq
 
+    @rank_zero_only
     def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         """Watches model weights with wandb."""
         logger = get_wandb_logger(trainer)
         logger.watch(model=trainer.model, log=self.log, log_freq=self.log_freq)
 
 
-@attr.s
 class UploadCodeAsArtifact(Callback):
     """Upload all *.py files to W&B as an artifact, at the beginning of the run."""
 
-    project_dir: Path = attr.ib(converter=Path)
-
-    def __attrs_pre_init__(self) -> None:
-        super().__init__()
+    def __init__(self, project_dir: str) -> None:
+        self.project_dir = Path(project_dir)
 
+    @rank_zero_only
     def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         """Uploads project code as an artifact."""
         logger = get_wandb_logger(trainer)
@@ -58,16 +53,16 @@ class UploadCodeAsArtifact(Callback):
         experiment.use_artifact(artifact)
 
 
-@attr.s
-class UploadCheckpointAsArtifact(Callback):
+class UploadCheckpointsAsArtifact(Callback):
     """Upload checkpoint to wandb as an artifact, at the end of a run."""
 
-    ckpt_dir: Path = attr.ib(converter=Path)
-    upload_best_only: bool = attr.ib()
-
-    def __attrs_pre_init__(self) -> None:
-        super().__init__()
+    def __init__(
+        self, ckpt_dir: str = "checkpoints/", upload_best_only: bool = False
+    ) -> None:
+        self.ckpt_dir = ckpt_dir
+        self.upload_best_only = upload_best_only
 
+    @rank_zero_only
     def on_train_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         """Uploads model checkpoint to W&B."""
         logger = get_wandb_logger(trainer)
@@ -83,15 +78,12 @@ class UploadCheckpointAsArtifact(Callback):
         experiment.use_artifact(ckpts)
 
 
-@attr.s
 class LogTextPredictions(Callback):
     """Logs a validation batch with image to text transcription."""
 
-    num_samples: int = attr.ib(default=8)
-    ready: bool = attr.ib(default=True)
-
-    def __attrs_pre_init__(self) -> None:
-        super().__init__()
+    def __init__(self, num_samples: int = 8) -> None:
+        self.num_samples = num_samples
+        self.ready = False
 
     def _log_predictions(
         self, stage: str, trainer: Trainer, pl_module: LightningModule
@@ -111,20 +103,20 @@ class LogTextPredictions(Callback):
         logits = pl_module(imgs)
 
         mapping = pl_module.mapping
+        columns = ["id", "image", "prediction", "truth"]
+        data = [
+            [id, wandb.Image(img), mapping.get_text(pred), mapping.get_text(label)]
+            for id, (img, pred, label) in enumerate(
+                zip(
+                    imgs[: self.num_samples],
+                    logits[: self.num_samples],
+                    labels[: self.num_samples],
+                )
+            )
+        ]
+
         experiment.log(
-            {
-                f"OCR/{experiment.name}/{stage}": [
-                    wandb.Image(
-                        img,
-                        caption=f"Pred: {mapping.get_text(pred)}, Label: {mapping.get_text(label)}",
-                    )
-                    for img, pred, label in zip(
-                        imgs[: self.num_samples],
-                        logits[: self.num_samples],
-                        labels[: self.num_samples],
-                    )
-                ]
-            }
+            {f"OCR/{experiment.name}/{stage}": wandb.Table(data=data, columns=columns)}
         )
 
     def on_sanity_check_start(
@@ -143,20 +135,17 @@ class LogTextPredictions(Callback):
         """Logs predictions on validation epoch end."""
         self._log_predictions(stage="val", trainer=trainer, pl_module=pl_module)
 
-    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
+    def on_test_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         """Logs predictions on train epoch end."""
         self._log_predictions(stage="test", trainer=trainer, pl_module=pl_module)
 
 
-@attr.s
 class LogReconstuctedImages(Callback):
     """Log reconstructions of images."""
 
-    num_samples: int = attr.ib(default=8)
-    ready: bool = attr.ib(default=True)
-
-    def __attrs_pre_init__(self) -> None:
-        super().__init__()
+    def __init__(self, num_samples: int = 8) -> None:
+        self.num_samples = num_samples
+        self.ready = False
 
     def _log_reconstruction(
         self, stage: str, trainer: Trainer, pl_module: LightningModule
@@ -202,6 +191,6 @@ class LogReconstuctedImages(Callback):
         """Logs predictions on validation epoch end."""
         self._log_reconstruction(stage="val", trainer=trainer, pl_module=pl_module)
 
-    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
+    def on_test_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         """Logs predictions on train epoch end."""
         self._log_reconstruction(stage="test", trainer=trainer, pl_module=pl_module)
-- 
cgit v1.2.3-70-g09d2