# @package _global_

defaults:
  - override /criterion: cross_entropy
  - override /callbacks: htr
  - override /datamodule: iam_lines
  - override /network: mammut_lines
  - override /model: lit_mammut
  - override /lr_scheduler: cosine_annealing
  - override /optimizer: adan

tags: [lines, vit]
epochs: &epochs 320
ignore_index: &ignore_index 3
# summary: [[1, 1, 56, 1024], [1, 89]]

logger:
  wandb:
    tags: ${tags}

criterion:
  ignore_index: *ignore_index
  # label_smoothing: 0.05


decoder:
  max_output_len: 89

# callbacks:
#   stochastic_weight_averaging:
#     _target_: pytorch_lightning.callbacks.StochasticWeightAveraging
#     swa_epoch_start: 0.75
#     swa_lrs: 1.0e-5
#     annealing_epochs: 10
#     annealing_strategy: cos
#     device: null

lr_scheduler:
  T_max: *epochs

datamodule:
  batch_size: 16
  train_fraction: 0.95

model:
  max_output_len: 89

optimizer:
  lr: 1.0e-3

trainer:
  fast_dev_run: false
  gradient_clip_val: 1.0
  max_epochs: *epochs
  accumulate_grad_batches: 1
  limit_train_batches: 1.0
  limit_val_batches: 1.0
  limit_test_batches: 1.0