diff options
-rw-r--r-- | text_recognizer/data/iam_extended_paragraphs.py | 10 | ||||
-rw-r--r-- | text_recognizer/data/iam_paragraphs.py | 9 | ||||
-rw-r--r-- | text_recognizer/models/base.py | 10 | ||||
-rw-r--r-- | text_recognizer/models/transformer.py | 8 | ||||
-rw-r--r-- | text_recognizer/networks/__init__.py | 1 | ||||
-rw-r--r-- | text_recognizer/networks/image_transformer.py | 6 | ||||
-rw-r--r-- | text_recognizer/networks/residual_network.py | 6 | ||||
-rw-r--r-- | text_recognizer/networks/transducer/transducer.py | 7 | ||||
-rw-r--r-- | text_recognizer/networks/vqvae/decoder.py | 18 | ||||
-rw-r--r-- | text_recognizer/networks/vqvae/encoder.py | 12 | ||||
-rw-r--r-- | training/experiments/image_transformer.yaml | 13 | ||||
-rw-r--r-- | training/run_experiment.py | 50 |
12 files changed, 69 insertions, 81 deletions
diff --git a/text_recognizer/data/iam_extended_paragraphs.py b/text_recognizer/data/iam_extended_paragraphs.py index c144341..d2529b4 100644 --- a/text_recognizer/data/iam_extended_paragraphs.py +++ b/text_recognizer/data/iam_extended_paragraphs.py @@ -18,16 +18,10 @@ class IAMExtendedParagraphs(BaseDataModule): super().__init__(batch_size, num_workers) self.iam_paragraphs = IAMParagraphs( - batch_size, - num_workers, - train_fraction, - augment, + batch_size, num_workers, train_fraction, augment, ) self.iam_synthetic_paragraphs = IAMSyntheticParagraphs( - batch_size, - num_workers, - train_fraction, - augment, + batch_size, num_workers, train_fraction, augment, ) self.dims = self.iam_paragraphs.dims diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py index 314d458..f588587 100644 --- a/text_recognizer/data/iam_paragraphs.py +++ b/text_recognizer/data/iam_paragraphs.py @@ -161,10 +161,7 @@ def get_dataset_properties() -> Dict: "min": min(_get_property_values("num_lines")), "max": max(_get_property_values("num_lines")), }, - "crop_shape": { - "min": crop_shapes.min(axis=0), - "max": crop_shapes.max(axis=0), - }, + "crop_shape": {"min": crop_shapes.min(axis=0), "max": crop_shapes.max(axis=0),}, "aspect_ratio": { "min": aspect_ratio.min(axis=0), "max": aspect_ratio.max(axis=0), @@ -285,9 +282,7 @@ def get_transform(image_shape: Tuple[int, int], augment: bool) -> transforms.Com ), transforms.ColorJitter(brightness=(0.8, 1.6)), transforms.RandomAffine( - degrees=1, - shear=(-10, 10), - interpolation=InterpolationMode.BILINEAR, + degrees=1, shear=(-10, 10), interpolation=InterpolationMode.BILINEAR, ), ] else: diff --git a/text_recognizer/models/base.py b/text_recognizer/models/base.py index f4df3f3..3c1919e 100644 --- a/text_recognizer/models/base.py +++ b/text_recognizer/models/base.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Union, Tuple, Type import madgrad -from omegaconf import OmegaConf +from omegaconf import DictConfig, OmegaConf import pytorch_lightning as pl import torch from torch import nn @@ -16,9 +16,9 @@ class LitBaseModel(pl.LightningModule): def __init__( self, network: Type[nn.Module], - optimizer: Union[OmegaConf, Dict], - lr_scheduler: Union[OmegaConf, Dict], - criterion: Union[OmegaConf, Dict], + optimizer: Union[DictConfig, Dict], + lr_scheduler: Union[DictConfig, Dict], + criterion: Union[DictConfig, Dict], monitor: str = "val_loss", ) -> None: super().__init__() @@ -34,7 +34,7 @@ class LitBaseModel(pl.LightningModule): self.test_acc = torchmetrics.Accuracy() @staticmethod - def configure_criterion(criterion: Union[OmegaConf, Dict]) -> Type[nn.Module]: + def configure_criterion(criterion: Union[DictConfig, Dict]) -> Type[nn.Module]: """Returns a loss functions.""" criterion = OmegaConf.create(criterion) args = {} or criterion.args diff --git a/text_recognizer/models/transformer.py b/text_recognizer/models/transformer.py index 983e274..b23685b 100644 --- a/text_recognizer/models/transformer.py +++ b/text_recognizer/models/transformer.py @@ -1,7 +1,7 @@ """PyTorch Lightning model for base Transformers.""" from typing import Dict, List, Optional, Union, Tuple -from omegaconf import OmegaConf +from omegaconf import DictConfig, OmegaConf import pytorch_lightning as pl import torch from torch import nn @@ -20,9 +20,9 @@ class LitTransformerModel(LitBaseModel): def __init__( self, network: Type[nn, Module], - optimizer: Union[OmegaConf, Dict], - lr_scheduler: Union[OmegaConf, Dict], - criterion: Union[OmegaConf, Dict], + optimizer: Union[DictConfig, Dict], + lr_scheduler: Union[DictConfig, Dict], + criterion: Union[DictConfig, Dict], monitor: str = "val_loss", mapping: Optional[List[str]] = None, ) -> None: diff --git a/text_recognizer/networks/__init__.py b/text_recognizer/networks/__init__.py index 4dcaf2e..979149f 100644 --- a/text_recognizer/networks/__init__.py +++ b/text_recognizer/networks/__init__.py @@ -1,3 +1,2 @@ """Network modules""" from .image_transformer import ImageTransformer - diff --git a/text_recognizer/networks/image_transformer.py b/text_recognizer/networks/image_transformer.py index edebca9..9ed67a4 100644 --- a/text_recognizer/networks/image_transformer.py +++ b/text_recognizer/networks/image_transformer.py @@ -13,7 +13,7 @@ import math from typing import Dict, List, Union, Sequence, Tuple, Type from einops import rearrange -from omegaconf import OmegaConf +from omegaconf import DictConfig, OmegaConf import torch from torch import nn from torch import Tensor @@ -34,7 +34,7 @@ class ImageTransformer(nn.Module): self, input_shape: Sequence[int], output_shape: Sequence[int], - encoder: Union[OmegaConf, Dict], + encoder: Union[DictConfig, Dict], mapping: str, num_decoder_layers: int = 4, hidden_dim: int = 256, @@ -101,7 +101,7 @@ class ImageTransformer(nn.Module): nn.init.normal_(self.feature_map_encoding.bias, -bound, bound) @staticmethod - def _configure_encoder(encoder: Union[OmegaConf, NamedTuple]) -> Type[nn.Module]: + def _configure_encoder(encoder: Union[DictConfig, Dict]) -> Type[nn.Module]: encoder = OmegaConf.create(encoder) network_module = importlib.import_module("text_recognizer.networks") encoder_class = getattr(network_module, encoder.type) diff --git a/text_recognizer/networks/residual_network.py b/text_recognizer/networks/residual_network.py index da7553d..c33f419 100644 --- a/text_recognizer/networks/residual_network.py +++ b/text_recognizer/networks/residual_network.py @@ -20,11 +20,7 @@ class Conv2dAuto(nn.Conv2d): def conv_bn(in_channels: int, out_channels: int, *args, **kwargs) -> nn.Sequential: """3x3 convolution with batch norm.""" - conv3x3 = partial( - Conv2dAuto, - kernel_size=3, - bias=False, - ) + conv3x3 = partial(Conv2dAuto, kernel_size=3, bias=False,) return nn.Sequential( conv3x3(in_channels, out_channels, *args, **kwargs), nn.BatchNorm2d(out_channels), diff --git a/text_recognizer/networks/transducer/transducer.py b/text_recognizer/networks/transducer/transducer.py index b10f93a..d7e3d08 100644 --- a/text_recognizer/networks/transducer/transducer.py +++ b/text_recognizer/networks/transducer/transducer.py @@ -392,12 +392,7 @@ def load_transducer_loss( transitions = gtn.load(str(processed_path / transitions)) preprocessor = Preprocessor( - data_dir, - num_features, - tokens_path, - lexicon_path, - use_words, - prepend_wordsep, + data_dir, num_features, tokens_path, lexicon_path, use_words, prepend_wordsep, ) num_tokens = preprocessor.num_tokens diff --git a/text_recognizer/networks/vqvae/decoder.py b/text_recognizer/networks/vqvae/decoder.py index 67ed0d9..8847aba 100644 --- a/text_recognizer/networks/vqvae/decoder.py +++ b/text_recognizer/networks/vqvae/decoder.py @@ -44,12 +44,7 @@ class Decoder(nn.Module): # Configure encoder. self.decoder = self._build_decoder( - channels, - kernel_sizes, - strides, - num_residual_layers, - activation, - dropout, + channels, kernel_sizes, strides, num_residual_layers, activation, dropout, ) def _build_decompression_block( @@ -78,9 +73,7 @@ class Decoder(nn.Module): ) if i < len(self.upsampling): - modules.append( - nn.Upsample(size=self.upsampling[i]), - ) + modules.append(nn.Upsample(size=self.upsampling[i]),) if dropout is not None: modules.append(dropout) @@ -109,12 +102,7 @@ class Decoder(nn.Module): ) -> nn.Sequential: self.res_block.append( - nn.Conv2d( - self.embedding_dim, - channels[0], - kernel_size=1, - stride=1, - ) + nn.Conv2d(self.embedding_dim, channels[0], kernel_size=1, stride=1,) ) # Bottleneck module. diff --git a/text_recognizer/networks/vqvae/encoder.py b/text_recognizer/networks/vqvae/encoder.py index ede5c31..d3adac5 100644 --- a/text_recognizer/networks/vqvae/encoder.py +++ b/text_recognizer/networks/vqvae/encoder.py @@ -11,10 +11,7 @@ from text_recognizer.networks.vqvae.vector_quantizer import VectorQuantizer class _ResidualBlock(nn.Module): def __init__( - self, - in_channels: int, - out_channels: int, - dropout: Optional[Type[nn.Module]], + self, in_channels: int, out_channels: int, dropout: Optional[Type[nn.Module]], ) -> None: super().__init__() self.block = [ @@ -138,12 +135,7 @@ class Encoder(nn.Module): ) encoder.append( - nn.Conv2d( - channels[-1], - self.embedding_dim, - kernel_size=1, - stride=1, - ) + nn.Conv2d(channels[-1], self.embedding_dim, kernel_size=1, stride=1,) ) return nn.Sequential(*encoder) diff --git a/training/experiments/image_transformer.yaml b/training/experiments/image_transformer.yaml index 9e8f9fc..bedcbb5 100644 --- a/training/experiments/image_transformer.yaml +++ b/training/experiments/image_transformer.yaml @@ -1,9 +1,12 @@ +seed: 4711 + network: + desc: null type: ImageTransformer args: encoder: - type: None - args: None + type: null + args: null num_decoder_layers: 4 hidden_dim: 256 num_heads: 4 @@ -12,6 +15,7 @@ network: transformer_activation: glu model: + desc: null type: LitTransformerModel args: optimizer: @@ -31,11 +35,11 @@ model: weight: None ignore_index: -100 reduction: mean - monitor: val_loss mapping: sentence_piece data: + desc: null type: IAMExtendedParagraphs args: batch_size: 16 @@ -55,6 +59,7 @@ callbacks: patience: 10 trainer: + desc: null args: stochastic_weight_avg: true auto_scale_batch_size: binsearch @@ -62,6 +67,6 @@ trainer: fast_dev_run: false gpus: 1 precision: 16 - max_epocs: 512 + max_epochs: 512 terminate_on_nan: true weights_summary: true diff --git a/training/run_experiment.py b/training/run_experiment.py index 289866e..1fb4bc2 100644 --- a/training/run_experiment.py +++ b/training/run_experiment.py @@ -6,7 +6,7 @@ from typing import Dict, List, NamedTuple, Optional, Union, Type import click from loguru import logger -from omegaconf import OmegaConf +from omegaconf import DictConfig, OmegaConf import pytorch_lightning as pl import torch from torch import nn @@ -42,6 +42,14 @@ def _configure_logging(log_dir: Optional[Path], verbose: int = 0) -> None: ) +def _load_config(file_path: Path) -> DictConfig: + """Return experiment config.""" + logger.info(f"Loading config from: {file_path}") + if not file_path.exists(): + raise FileNotFoundError(f"Experiment config not found at: {file_path}") + return OmegaConf.load(file_path) + + def _import_class(module_and_class_name: str) -> type: """Import class from module.""" module_name, class_name = module_and_class_name.rsplit(".", 1) @@ -50,26 +58,25 @@ def _import_class(module_and_class_name: str) -> type: def _configure_callbacks( - args: List[Union[OmegaConf, NamedTuple]] + callbacks: List[DictConfig], ) -> List[Type[pl.callbacks.Callback]]: """Configures lightning callbacks.""" pl_callbacks = [ - getattr(pl.callbacks, callback.type)(**callback.args) for callback in args + getattr(pl.callbacks, callback.type)(**callback.args) for callback in callbacks ] return pl_callbacks def _configure_logger( - network: Type[nn.Module], args: Dict, use_wandb: bool + network: Type[nn.Module], args: Dict, use_wandb: bool ) -> pl.loggers.WandbLogger: """Configures lightning logger.""" if use_wandb: pl_logger = pl.loggers.WandbLogger() pl_logger.watch(network) pl_logger.log_hyperparams(vars(args)) - else: - pl_logger = pl.logger.TensorBoardLogger("training/logs") - return pl_logger + return pl_logger + return pl.logger.TensorBoardLogger("training/logs") def _save_best_weights( @@ -89,7 +96,9 @@ def _save_best_weights( wandb.save(best_model_path) -def _load_lit_model(lit_model_class: type, network: Type[nn.Module], config: OmegaConf) -> Type[pl.LightningModule]: +def _load_lit_model( + lit_model_class: type, network: Type[nn.Module], config: DictConfig +) -> Type[pl.LightningModule]: """Load lightning model.""" if config.load_checkpoint is not None: logger.info( @@ -101,8 +110,17 @@ def _load_lit_model(lit_model_class: type, network: Type[nn.Module], config: Ome return lit_model_class(network=network, **config.model.args) -def run(path: str, train: bool, test: bool, tune: bool, use_wandb: bool) -> None: +def run( + filename: str, + train: bool, + test: bool, + tune: bool, + use_wandb: bool, + verbose: int = 0, +) -> None: """Runs experiment.""" + + _configure_logging(None, verbose=verbose) logger.info("Starting experiment...") # Seed everything in the experiment. @@ -110,8 +128,8 @@ def run(path: str, train: bool, test: bool, tune: bool, use_wandb: bool) -> None pl.utilities.seed.seed_everything(SEED) # Load config. - logger.info(f"Loading config from: {path}") - config = OmegaConf.load(path) + file_path = EXPERIMENTS_DIRNAME / filename + config = _load_config(file_path) # Load classes. data_module_class = _import_class(f"text_recognizer.data.{config.data.type}") @@ -169,8 +187,14 @@ def cli( verbose: int, ) -> None: """Run experiment.""" - _configure_logging(None, verbose=verbose) - run(path=experiment_config, train=train, test=test, tune=tune, use_wandb=use_wandb) + run( + filename=experiment_config, + train=train, + test=test, + tune=tune, + use_wandb=use_wandb, + verbose=verbose, + ) if __name__ == "__main__": |