1 files changed, 24 insertions, 15 deletions
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml
index 8404cd1..38b13a5 100644
--- a/training/conf/experiment/conv_transformer_lines.yaml
+++ b/training/conf/experiment/conv_transformer_lines.yaml
@@ -18,7 +18,7 @@ summary: [[1, 1, 56, 1024], [1, 89]]
 
 criterion:
   ignore_index: *ignore_index
-  # label_smoothing: 0.1
+  label_smoothing: 0.05
 
 callbacks:
   stochastic_weight_averaging:
@@ -40,30 +40,38 @@ optimizers:
 
 lr_schedulers:
   network:
-    _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
-    mode: min
-    factor: 0.5
-    patience: 10
-    threshold: 1.0e-4
-    threshold_mode: rel
-    cooldown: 0
-    min_lr: 1.0e-5
-    eps: 1.0e-8
+    _target_: torch.optim.lr_scheduler.OneCycleLR
+    max_lr: 3.0e-4
+    total_steps: null
+    epochs: *epochs
+    steps_per_epoch: 1284
+    pct_start: 0.3
+    anneal_strategy: cos
+    cycle_momentum: true
+    base_momentum: 0.85
+    max_momentum: 0.95
+    div_factor: 25.0
+    final_div_factor: 10000.0
+    three_phase: true
+    last_epoch: -1
     verbose: false
-    interval: epoch
-    monitor: val/loss
+    interval: step
+    monitor: val/cer
 
 datamodule:
-  batch_size: 16
+  batch_size: 8
+  train_fraction: 0.9
 
 network:
   input_dims: [1, 1, 56, 1024]
   num_classes: *num_classes
   pad_index: *ignore_index
+  encoder:
+    depth: 5
   decoder:
-    depth: 10
+    depth: 6
   pixel_embedding:
-    shape: [7, 128]
+    shape: [3, 64]
 
 model:
   max_output_len: *max_output_len
@@ -71,3 +79,4 @@ model:
 trainer:
   gradient_clip_val: 0.5
   max_epochs: *epochs
+  accumulate_grad_batches: 1