1 files changed, 9 insertions, 9 deletions
diff --git a/training/conf/experiment/cnn_transformer_paragraphs.yaml b/training/conf/experiment/cnn_transformer_paragraphs.yaml
index b415c29..e9cd254 100644
--- a/training/conf/experiment/cnn_transformer_paragraphs.yaml
+++ b/training/conf/experiment/cnn_transformer_paragraphs.yaml
@@ -10,7 +10,7 @@ defaults:
   - override /optimizers: null
 
 
-epochs: &epochs 512
+epochs: &epochs 1000
 ignore_index: &ignore_index 3
 num_classes: &num_classes 58
 max_output_len: &max_output_len 682
@@ -36,7 +36,7 @@ callbacks:
 optimizers:
   madgrad:
     _target_: madgrad.MADGRAD
-    lr: 3.0e-4
+    lr: 2.0e-4
     momentum: 0.9
     weight_decay: 0
     eps: 1.0e-6
@@ -46,11 +46,11 @@ optimizers:
 lr_schedulers:
   network:
     _target_: torch.optim.lr_scheduler.OneCycleLR
-    max_lr: 3.0e-4
+    max_lr: 2.0e-4
     total_steps: null
     epochs: *epochs
-    steps_per_epoch: 52
-    pct_start: 0.1
+    steps_per_epoch: 79
+    pct_start: 0.3
     anneal_strategy: cos
     cycle_momentum: true
     base_momentum: 0.85
@@ -70,13 +70,13 @@ datamodule:
   num_workers: 12
   train_fraction: 0.8
   augment: true
-  pin_memory: false
+  pin_memory: true
   word_pieces: false
   resize: null
 
 network:
   _target_: text_recognizer.networks.conv_transformer.ConvTransformer
-  input_dims: [1, 56, 1024]
+  input_dims: [1, 576, 640]
   hidden_dim: &hidden_dim 128
   encoder_dim: 1280
   dropout_rate: 0.2
@@ -133,7 +133,7 @@ trainer:
   stochastic_weight_avg: true
   auto_scale_batch_size: binsearch
   auto_lr_find: false
-  gradient_clip_val: 0.5
+  gradient_clip_val: 0.0
   fast_dev_run: false
   gpus: 1
   precision: 16
@@ -144,5 +144,5 @@ trainer:
   limit_val_batches: 1.0
   limit_test_batches: 1.0
   resume_from_checkpoint: null
-  accumulate_grad_batches: 32
+  accumulate_grad_batches: 16
   overfit_batches: 0