summaryrefslogtreecommitdiff
path: root/training/conf/experiment
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/experiment')
-rw-r--r--training/conf/experiment/conv_transformer_lines.yaml39
1 files changed, 24 insertions, 15 deletions
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml
index 8404cd1..38b13a5 100644
--- a/training/conf/experiment/conv_transformer_lines.yaml
+++ b/training/conf/experiment/conv_transformer_lines.yaml
@@ -18,7 +18,7 @@ summary: [[1, 1, 56, 1024], [1, 89]]
criterion:
ignore_index: *ignore_index
- # label_smoothing: 0.1
+ label_smoothing: 0.05
callbacks:
stochastic_weight_averaging:
@@ -40,30 +40,38 @@ optimizers:
lr_schedulers:
network:
- _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
- mode: min
- factor: 0.5
- patience: 10
- threshold: 1.0e-4
- threshold_mode: rel
- cooldown: 0
- min_lr: 1.0e-5
- eps: 1.0e-8
+ _target_: torch.optim.lr_scheduler.OneCycleLR
+ max_lr: 3.0e-4
+ total_steps: null
+ epochs: *epochs
+ steps_per_epoch: 1284
+ pct_start: 0.3
+ anneal_strategy: cos
+ cycle_momentum: true
+ base_momentum: 0.85
+ max_momentum: 0.95
+ div_factor: 25.0
+ final_div_factor: 10000.0
+ three_phase: true
+ last_epoch: -1
verbose: false
- interval: epoch
- monitor: val/loss
+ interval: step
+ monitor: val/cer
datamodule:
- batch_size: 16
+ batch_size: 8
+ train_fraction: 0.9
network:
input_dims: [1, 1, 56, 1024]
num_classes: *num_classes
pad_index: *ignore_index
+ encoder:
+ depth: 5
decoder:
- depth: 10
+ depth: 6
pixel_embedding:
- shape: [7, 128]
+ shape: [3, 64]
model:
max_output_len: *max_output_len
@@ -71,3 +79,4 @@ model:
trainer:
gradient_clip_val: 0.5
max_epochs: *epochs
+ accumulate_grad_batches: 1