diff options
Diffstat (limited to 'training/conf/experiment/vqgan_iam_lines.yaml')
-rw-r--r-- | training/conf/experiment/vqgan_iam_lines.yaml | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/training/conf/experiment/vqgan_iam_lines.yaml b/training/conf/experiment/vqgan_iam_lines.yaml new file mode 100644 index 0000000..8bdf415 --- /dev/null +++ b/training/conf/experiment/vqgan_iam_lines.yaml @@ -0,0 +1,105 @@ +# @package _global_ + +defaults: + - override /network: null + - override /criterion: null + - override /datamodule: null + - override /model: lit_vqgan + - override /callbacks: wandb_vae + - override /optimizers: null + - override /lr_schedulers: null + +criterion: + _target_: text_recognizer.criterions.vqgan_loss.VQGANLoss + reconstruction_loss: + _target_: torch.nn.BCEWithLogitsLoss + reduction: mean + discriminator: + _target_: text_recognizer.criterions.n_layer_discriminator.NLayerDiscriminator + in_channels: 1 + num_channels: 64 + num_layers: 3 + commitment_weight: 0.25 + discriminator_weight: 0.8 + discriminator_factor: 1.0 + discriminator_iter_start: 1.5e4 + +datamodule: + _target_: text_recognizer.data.iam_lines.IAMLines + batch_size: 24 + num_workers: 12 + train_fraction: 0.8 + augment: true + pin_memory: false + +lr_schedulers: + generator: + _target_: torch.optim.lr_scheduler.CosineAnnealingLR + T_max: 64 + eta_min: 4.5e-6 + last_epoch: -1 + interval: epoch + monitor: val/loss +# discriminator: +# _target_: torch.optim.lr_scheduler.CosineAnnealingLR +# T_max: 64 +# eta_min: 0.0 +# last_epoch: -1 +# +# interval: epoch +# monitor: val/loss + +optimizers: + generator: + _target_: madgrad.MADGRAD + lr: 1.0e-4 + momentum: 0.5 + weight_decay: 0 + eps: 1.0e-7 + parameters: network + + discriminator: + _target_: madgrad.MADGRAD + lr: 4.5e-6 + momentum: 0.5 + weight_decay: 0 + eps: 1.0e-6 + parameters: loss_fn.discriminator + +network: + _target_: text_recognizer.networks.vqvae.vqvae.VQVAE + hidden_dim: 256 + embedding_dim: 32 + num_embeddings: 512 + decay: 0.99 + encoder: + _target_: text_recognizer.networks.vqvae.encoder.Encoder + in_channels: 1 + hidden_dim: 32 + channels_multipliers: [1, 4, 8] + dropout_rate: 0.0 + activation: mish + use_norm: true + num_residuals: 2 + residual_channels: 32 + decoder: + _target_: text_recognizer.networks.vqvae.decoder.Decoder + out_channels: 1 + hidden_dim: 32 + channels_multipliers: [8, 4, 1] + dropout_rate: 0.0 + activation: mish + use_norm: true + num_residuals: 2 + residual_channels: 32 + +trainer: + max_epochs: 64 + # limit_train_batches: 0.1 + # limit_val_batches: 0.1 + # gradient_clip_val: 100 + +# tune: false +# train: true +# test: false +summary: [2, 1, 56, 1024] |