src/training/experiments/sample_experiment.yml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

experiment_group: Sample Experiments
experiments:
    - train_args:
        batch_size: 256
        max_epochs: 32
      dataset:
        type: EmnistDataset
        args:
          sample_to_balance: true
          subsample_fraction: null
          transform: null
          target_transform: null
          seed: 4711
        train_args:
          num_workers: 6
          train_fraction: 0.8

      model: CharacterModel
      metrics: [accuracy]
      # network: MLP
      # network_args:
      #   input_size: 784
      #   hidden_size: 512
      #   output_size: 80
      #   num_layers: 5
      #   dropout_rate: 0.2
      #   activation_fn: SELU
      network:
        type: ResidualNetwork
        args:
          in_channels: 1
          num_classes: 80
          depths: [2, 2]
          block_sizes: [64, 64]
          activation: leaky_relu
          stn: true
      # network:
      #   type: WideResidualNetwork
      #   args:
      #     in_channels: 1
      #     num_classes: 80
      #     depth: 10
      #     num_layers: 3
      #     width_factor: 4
      #     dropout_rate: 0.2
      #     activation: SELU
      # network: LeNet
      # network_args:
      #   output_size: 62
      #   activation_fn: GELU
      criterion:
        type: CrossEntropyLoss
        args:
          weight: null
          ignore_index: -100
          reduction: mean
      optimizer:
        type: AdamW
        args:
          lr: 1.e-02
          betas: [0.9, 0.999]
          eps: 1.e-08
          # weight_decay: 5.e-4
          amsgrad: false
      # lr_scheduler:
      #   type: OneCycleLR
      #   args:
      #     max_lr: 1.e-03
      #     epochs: null
      #     anneal_strategy: linear
      lr_scheduler:
        type: CosineAnnealingLR
        args:
          T_max: null
      swa_args:
        start: 2
        lr: 5.e-2
      callbacks: [Checkpoint, ProgressBar, WandbCallback, WandbImageLogger, EarlyStopping, SWA] # OneCycleLR]
      callback_args:
        Checkpoint:
          monitor: val_accuracy
        ProgressBar:
          epochs: null
          log_batch_frequency: 100
        EarlyStopping:
          monitor: val_loss
          min_delta: 0.0
          patience: 5
          mode: min
        WandbCallback:
          log_batch_frequency: 10
        WandbImageLogger:
          num_examples: 4
          use_transpose: true
        # OneCycleLR:
        #   null
        SWA:
          null
      verbosity: 0 # 0, 1, 2
      resume_experiment: null
      test: true
      test_metric: test_accuracy