# @package _group_ type: LitVQVAEModel args: optimizer: type: MADGRAD args: lr: 1.0e-3 momentum: 0.9 weight_decay: 0 eps: 1.0e-6 lr_scheduler: type: OneCycleLR args: interval: step max_lr: 1.0e-3 three_phase: true epochs: 64 steps_per_epoch: 633 # num_samples / batch_size criterion: type: MSELoss args: reduction: mean monitor: val_loss mapping: sentence_piece