diff options
Diffstat (limited to 'src/training')
-rw-r--r-- | src/training/experiments/sample_experiment.yml | 1 | ||||
-rw-r--r-- | src/training/run_experiment.py | 1 | ||||
-rw-r--r-- | src/training/trainer/callbacks/__init__.py | 3 | ||||
-rw-r--r-- | src/training/trainer/callbacks/wandb_callbacks.py | 97 | ||||
-rw-r--r-- | src/training/trainer/train.py | 13 |
5 files changed, 105 insertions, 10 deletions
diff --git a/src/training/experiments/sample_experiment.yml b/src/training/experiments/sample_experiment.yml index a073a87..8f94475 100644 --- a/src/training/experiments/sample_experiment.yml +++ b/src/training/experiments/sample_experiment.yml @@ -33,7 +33,6 @@ experiments: depths: [2, 2] block_sizes: [64, 64] activation: leaky_relu - stn: true # network: # type: WideResidualNetwork # args: diff --git a/src/training/run_experiment.py b/src/training/run_experiment.py index a883b45..0167725 100644 --- a/src/training/run_experiment.py +++ b/src/training/run_experiment.py @@ -286,6 +286,7 @@ def run_experiment( callbacks=callbacks, transformer_model=experiment_config["train_args"]["transformer_model"], max_norm=experiment_config["train_args"]["max_norm"], + freeze_backbone=experiment_config["train_args"]["freeze_backbone"], ) # Train the model. diff --git a/src/training/trainer/callbacks/__init__.py b/src/training/trainer/callbacks/__init__.py index e1bd858..95ec142 100644 --- a/src/training/trainer/callbacks/__init__.py +++ b/src/training/trainer/callbacks/__init__.py @@ -7,7 +7,7 @@ from .lr_schedulers import ( SWA, ) from .progress_bar import ProgressBar -from .wandb_callbacks import WandbCallback, WandbImageLogger +from .wandb_callbacks import WandbCallback, WandbImageLogger, WandbSegmentationLogger __all__ = [ "Callback", @@ -17,6 +17,7 @@ __all__ = [ "LRScheduler", "WandbCallback", "WandbImageLogger", + "WandbSegmentationLogger", "ProgressBar", "SWA", ] diff --git a/src/training/trainer/callbacks/wandb_callbacks.py b/src/training/trainer/callbacks/wandb_callbacks.py index 1627f17..df1fd8f 100644 --- a/src/training/trainer/callbacks/wandb_callbacks.py +++ b/src/training/trainer/callbacks/wandb_callbacks.py @@ -2,12 +2,10 @@ from typing import Callable, Dict, List, Optional, Type import numpy as np -import torch -from torchvision.transforms import ToTensor from training.trainer.callbacks import Callback import wandb -from text_recognizer.datasets import Transpose +import text_recognizer.datasets.transforms as transforms from text_recognizer.models.base import Model @@ -52,14 +50,14 @@ class WandbImageLogger(Callback): self, example_indices: Optional[List] = None, num_examples: int = 4, - use_transpose: Optional[bool] = False, + transform: Optional[bool] = None, ) -> None: """Initializes the WandbImageLogger with the model to train. Args: example_indices (Optional[List]): Indices for validation images. Defaults to None. num_examples (int): Number of random samples to take if example_indices are not specified. Defaults to 4. - use_transpose (Optional[bool]): Use transpose on image or not. Defaults to False. + transform (Optional[Dict]): Use transform on image or not. Defaults to None. """ @@ -68,7 +66,13 @@ class WandbImageLogger(Callback): self.example_indices = example_indices self.test_sample_indices = None self.num_examples = num_examples - self.transpose = Transpose() if use_transpose else None + self.transform = ( + self._configure_transform(transform) if transform is not None else None + ) + + def _configure_transform(self, transform: Dict) -> Callable: + args = transform["args"] or {} + return getattr(transforms, transform["type"])(**args) def set_model(self, model: Type[Model]) -> None: """Sets the model and extracts validation images from the dataset.""" @@ -101,7 +105,7 @@ class WandbImageLogger(Callback): """Get network predictions on validation images.""" images = [] for i, image in enumerate(self.images): - image = self.transpose(image) if self.transpose is not None else image + image = self.transform(image) if self.transform is not None else image pred, conf = self.model.predict_on_image(image) if isinstance(self.targets[i], list): ground_truth = "".join( @@ -116,3 +120,82 @@ class WandbImageLogger(Callback): images.append(wandb.Image(image, caption=caption)) wandb.log({f"{self.caption}": images}, commit=False) + + +class WandbSegmentationLogger(Callback): + """Custom W&B callback for image logging.""" + + def __init__( + self, + class_labels: Dict, + example_indices: Optional[List] = None, + num_examples: int = 4, + ) -> None: + """Initializes the WandbImageLogger with the model to train. + + Args: + class_labels (Dict): A dict with int as key and class string as value. + example_indices (Optional[List]): Indices for validation images. Defaults to None. + num_examples (int): Number of random samples to take if example_indices are not specified. Defaults to 4. + + """ + + super().__init__() + self.caption = None + self.class_labels = {int(k): v for k, v in class_labels.items()} + self.example_indices = example_indices + self.test_sample_indices = None + self.num_examples = num_examples + + def set_model(self, model: Type[Model]) -> None: + """Sets the model and extracts validation images from the dataset.""" + self.model = model + self.caption = "Validation Segmentation Examples" + if self.example_indices is None: + self.example_indices = np.random.randint( + 0, len(self.model.val_dataset), self.num_examples + ) + self.images = self.model.val_dataset.dataset.data[self.example_indices] + self.targets = self.model.val_dataset.dataset.targets[self.example_indices] + self.targets = self.targets.tolist() + + def on_test_begin(self) -> None: + """Get samples from test dataset.""" + self.caption = "Test Segmentation Examples" + if self.test_sample_indices is None: + self.test_sample_indices = np.random.randint( + 0, len(self.model.test_dataset), self.num_examples + ) + self.images = self.model.test_dataset.data[self.test_sample_indices] + self.targets = self.model.test_dataset.targets[self.test_sample_indices] + self.targets = self.targets.tolist() + + def on_test_end(self) -> None: + """Log test images.""" + self.on_epoch_end(0, {}) + + def on_epoch_end(self, epoch: int, logs: Dict) -> None: + """Get network predictions on validation images.""" + images = [] + for i, image in enumerate(self.images): + pred_mask = ( + self.model.predict_on_image(image).detach().squeeze(0).cpu().numpy() + ) + gt_mask = np.array(self.targets[i]) + images.append( + wandb.Image( + image, + masks={ + "predictions": { + "mask_data": pred_mask, + "class_labels": self.class_labels, + }, + "ground_truth": { + "mask_data": gt_mask, + "class_labels": self.class_labels, + }, + }, + ) + ) + + wandb.log({f"{self.caption}": images}, commit=False) diff --git a/src/training/trainer/train.py b/src/training/trainer/train.py index 8ae994a..40a25da 100644 --- a/src/training/trainer/train.py +++ b/src/training/trainer/train.py @@ -38,6 +38,7 @@ class Trainer: callbacks: List[Type[Callback]], transformer_model: bool = False, max_norm: float = 0.0, + freeze_backbone: Optional[int] = None, ) -> None: """Initialization of the Trainer. @@ -46,12 +47,15 @@ class Trainer: callbacks (CallbackList): List of callbacks to be called. transformer_model (bool): Transformer model flag, modifies the input to the model. Default is False. max_norm (float): Max norm for gradient clipping. Defaults to 0.0. + freeze_backbone (Optional[int]): How many epochs to freeze the backbone for. Used when training + Transformers. Default is None. """ # Training arguments. self.start_epoch = 1 self.max_epochs = max_epochs self.callbacks = callbacks + self.freeze_backbone = freeze_backbone # Flag for setting callbacks. self.callbacks_configured = False @@ -115,7 +119,14 @@ class Trainer: # Forward pass. # Get the network prediction. if self.transformer_model: - output = self.model.network.forward(data, targets[:, :-1]) + if self.freeze_backbone is not None and batch < self.freeze_backbone: + with torch.no_grad(): + image_features = self.model.network.extract_image_features(data) + output = self.model.network.decode_image_features( + image_features, targets[:, :-1] + ) + else: + output = self.model.network.forward(data, targets[:, :-1]) output = rearrange(output, "b t v -> (b t) v") targets = rearrange(targets[:, 1:], "b t -> (b t)").long() else: |