1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
experiment_group: Sample Experiments
experiments:
- dataset: EmnistDataset
dataset_args:
sample_to_balance: true
subsample_fraction: null
transform: null
target_transform: null
seed: 4711
data_loader_args:
splits: [train, val]
batch_size: 256
shuffle: true
num_workers: 8
cuda: true
model: CharacterModel
metrics: [accuracy]
network: MLP
network_args:
input_size: 784
output_size: 62
num_layers: 3
activation_fn: GELU
# network: LeNet
# network_args:
# output_size: 62
# activation_fn: GELU
train_args:
batch_size: 256
epochs: 16
criterion: CrossEntropyLoss
criterion_args:
weight: null
ignore_index: -100
reduction: mean
# optimizer: RMSprop
# optimizer_args:
# lr: 1.e-3
# alpha: 0.9
# eps: 1.e-7
# momentum: 0
# weight_decay: 0
# centered: false
optimizer: AdamW
optimizer_args:
lr: 1.e-2
betas: [0.9, 0.999]
eps: 1.e-08
weight_decay: 0
amsgrad: false
# lr_scheduler: null
lr_scheduler: OneCycleLR
lr_scheduler_args:
max_lr: 1.e-3
epochs: 16
callbacks: [Checkpoint, EarlyStopping, WandbCallback, WandbImageLogger, OneCycleLR]
callback_args:
Checkpoint:
monitor: val_accuracy
EarlyStopping:
monitor: val_loss
min_delta: 0.0
patience: 3
mode: min
WandbCallback:
log_batch_frequency: 10
WandbImageLogger:
num_examples: 4
OneCycleLR:
null
verbosity: 2 # 0, 1, 2
resume_experiment: null
# - dataset: EmnistDataset
# dataset_args:
# sample_to_balance: true
# subsample_fraction: null
# transform: null
# target_transform: null
# seed: 4711
# data_loader_args:
# splits: [train, val]
# batch_size: 256
# shuffle: true
# num_workers: 8
# cuda: true
# model: CharacterModel
# metrics: [accuracy]
# # network: MLP
# # network_args:
# # input_size: 784
# # output_size: 62
# # num_layers: 3
# # activation_fn: GELU
# network: LeNet
# network_args:
# output_size: 62
# activation_fn: GELU
# train_args:
# batch_size: 256
# epochs: 16
# criterion: CrossEntropyLoss
# criterion_args:
# weight: null
# ignore_index: -100
# reduction: mean
# # optimizer: RMSprop
# # optimizer_args:
# # lr: 1.e-3
# # alpha: 0.9
# # eps: 1.e-7
# # momentum: 0
# # weight_decay: 0
# # centered: false
# optimizer: AdamW
# optimizer_args:
# lr: 1.e-2
# betas: [0.9, 0.999]
# eps: 1.e-08
# weight_decay: 0
# amsgrad: false
# # lr_scheduler: null
# lr_scheduler: OneCycleLR
# lr_scheduler_args:
# max_lr: 1.e-3
# epochs: 16
# callbacks: [Checkpoint, EarlyStopping, WandbCallback, WandbImageLogger, OneCycleLR]
# callback_args:
# Checkpoint:
# monitor: val_accuracy
# EarlyStopping:
# monitor: val_loss
# min_delta: 0.0
# patience: 3
# mode: min
# WandbCallback:
# log_batch_frequency: 10
# WandbImageLogger:
# num_examples: 4
# OneCycleLR:
# null
# verbosity: 2 # 0, 1, 2
# resume_experiment: null
|