# @package _global_ defaults: - override /network: vqvae - override /criterion: null - override /model: lit_vqgan - override /callbacks: wandb_vae - override /optimizers: null - override /lr_schedulers: null criterion: _target_: text_recognizer.criterions.vqgan_loss.VQGANLoss reconstruction_loss: _target_: torch.nn.MSELoss reduction: mean discriminator: _target_: text_recognizer.criterions.n_layer_discriminator.NLayerDiscriminator in_channels: 1 num_channels: 64 num_layers: 3 vq_loss_weight: 0.25 discriminator_weight: 1.0 discriminator_factor: 1.0 discriminator_iter_start: 5e2 datamodule: batch_size: 8 resize: [288, 320] lr_schedulers: generator: _target_: torch.optim.lr_scheduler.CosineAnnealingLR T_max: 128 eta_min: 4.5e-6 last_epoch: -1 interval: epoch monitor: val/loss # _target_: torch.optim.lr_scheduler.OneCycleLR # max_lr: 3.0e-4 # total_steps: null # epochs: 100 # steps_per_epoch: 2496 # pct_start: 0.1 # anneal_strategy: cos # cycle_momentum: true # base_momentum: 0.85 # max_momentum: 0.95 # div_factor: 25 # final_div_factor: 1.0e4 # three_phase: true # last_epoch: -1 # verbose: false # # Non-class arguments # interval: step # monitor: val/loss # discriminator: # _target_: torch.optim.lr_scheduler.CosineAnnealingLR # T_max: 64 # eta_min: 0.0 # last_epoch: -1 # # interval: epoch # monitor: val/loss optimizers: generator: _target_: madgrad.MADGRAD lr: 1.0e-4 momentum: 0.5 weight_decay: 0 eps: 1.0e-7 parameters: network discriminator: _target_: madgrad.MADGRAD lr: 4.5e-6 momentum: 0.5 weight_decay: 0 eps: 1.0e-6 parameters: loss_fn.discriminator trainer: max_epochs: 128 limit_train_batches: 0.1 limit_val_batches: 0.1 # gradient_clip_val: 100