diff options
Diffstat (limited to 'src/training/experiments/sample_experiment.yml')
-rw-r--r-- | src/training/experiments/sample_experiment.yml | 127 |
1 files changed, 73 insertions, 54 deletions
diff --git a/src/training/experiments/sample_experiment.yml b/src/training/experiments/sample_experiment.yml index b00bd5a..17e220e 100644 --- a/src/training/experiments/sample_experiment.yml +++ b/src/training/experiments/sample_experiment.yml @@ -1,17 +1,20 @@ experiment_group: Sample Experiments experiments: - - dataset: EmnistDataset - dataset_args: - sample_to_balance: true - subsample_fraction: null - transform: null - target_transform: null - seed: 4711 - data_loader_args: - splits: [train, val] - shuffle: true - num_workers: 8 - cuda: true + - train_args: + batch_size: 256 + max_epochs: 32 + dataset: + type: EmnistDataset + args: + sample_to_balance: true + subsample_fraction: null + transform: null + target_transform: null + seed: 4711 + train_args: + num_workers: 6 + train_fraction: 0.8 + model: CharacterModel metrics: [accuracy] # network: MLP @@ -19,65 +22,81 @@ experiments: # input_size: 784 # hidden_size: 512 # output_size: 80 - # num_layers: 3 - # dropout_rate: 0 + # num_layers: 5 + # dropout_rate: 0.2 # activation_fn: SELU - network: ResidualNetwork - network_args: - in_channels: 1 - num_classes: 80 - depths: [2, 1] - block_sizes: [96, 32] + network: + type: ResidualNetwork + args: + in_channels: 1 + num_classes: 80 + depths: [2, 2] + block_sizes: [64, 64] + activation: leaky_relu + stn: true + # network: + # type: WideResidualNetwork + # args: + # in_channels: 1 + # num_classes: 80 + # depth: 10 + # num_layers: 3 + # width_factor: 4 + # dropout_rate: 0.2 + # activation: SELU # network: LeNet # network_args: # output_size: 62 # activation_fn: GELU - train_args: - batch_size: 256 - epochs: 32 - criterion: CrossEntropyLoss - criterion_args: - weight: null - ignore_index: -100 - reduction: mean - # optimizer: RMSprop - # optimizer_args: - # lr: 1.e-3 - # alpha: 0.9 - # eps: 1.e-7 - # momentum: 0 - # weight_decay: 0 - # centered: false - optimizer: AdamW - optimizer_args: - lr: 1.e-03 - betas: [0.9, 0.999] - eps: 1.e-08 - # weight_decay: 5.e-4 - amsgrad: false - # lr_scheduler: null - lr_scheduler: OneCycleLR - lr_scheduler_args: - max_lr: 1.e-03 - epochs: 32 - anneal_strategy: linear - callbacks: [Checkpoint, ProgressBar, EarlyStopping, WandbCallback, WandbImageLogger, OneCycleLR] + criterion: + type: CrossEntropyLoss + args: + weight: null + ignore_index: -100 + reduction: mean + optimizer: + type: AdamW + args: + lr: 1.e-02 + betas: [0.9, 0.999] + eps: 1.e-08 + # weight_decay: 5.e-4 + amsgrad: false + # lr_scheduler: + # type: OneCycleLR + # args: + # max_lr: 1.e-03 + # epochs: null + # anneal_strategy: linear + lr_scheduler: + type: CosineAnnealingLR + args: + T_max: null + swa_args: + start: 2 + lr: 5.e-2 + callbacks: [Checkpoint, ProgressBar, WandbCallback, WandbImageLogger, EarlyStopping, SWA] # OneCycleLR] callback_args: Checkpoint: monitor: val_accuracy ProgressBar: - epochs: 32 + epochs: null log_batch_frequency: 100 EarlyStopping: monitor: val_loss min_delta: 0.0 - patience: 3 + patience: 5 mode: min WandbCallback: log_batch_frequency: 10 WandbImageLogger: num_examples: 4 - OneCycleLR: + use_transpose: true + # OneCycleLR: + # null + SWA: null - verbosity: 1 # 0, 1, 2 + verbosity: 0 # 0, 1, 2 resume_experiment: null + test: true + test_metric: test_accuracy |