diff options
Diffstat (limited to 'training/conf/experiment/cnn_transformer_paragraphs.yaml')
-rw-r--r-- | training/conf/experiment/cnn_transformer_paragraphs.yaml | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/training/conf/experiment/cnn_transformer_paragraphs.yaml b/training/conf/experiment/cnn_transformer_paragraphs.yaml index b415c29..e9cd254 100644 --- a/training/conf/experiment/cnn_transformer_paragraphs.yaml +++ b/training/conf/experiment/cnn_transformer_paragraphs.yaml @@ -10,7 +10,7 @@ defaults: - override /optimizers: null -epochs: &epochs 512 +epochs: &epochs 1000 ignore_index: &ignore_index 3 num_classes: &num_classes 58 max_output_len: &max_output_len 682 @@ -36,7 +36,7 @@ callbacks: optimizers: madgrad: _target_: madgrad.MADGRAD - lr: 3.0e-4 + lr: 2.0e-4 momentum: 0.9 weight_decay: 0 eps: 1.0e-6 @@ -46,11 +46,11 @@ optimizers: lr_schedulers: network: _target_: torch.optim.lr_scheduler.OneCycleLR - max_lr: 3.0e-4 + max_lr: 2.0e-4 total_steps: null epochs: *epochs - steps_per_epoch: 52 - pct_start: 0.1 + steps_per_epoch: 79 + pct_start: 0.3 anneal_strategy: cos cycle_momentum: true base_momentum: 0.85 @@ -70,13 +70,13 @@ datamodule: num_workers: 12 train_fraction: 0.8 augment: true - pin_memory: false + pin_memory: true word_pieces: false resize: null network: _target_: text_recognizer.networks.conv_transformer.ConvTransformer - input_dims: [1, 56, 1024] + input_dims: [1, 576, 640] hidden_dim: &hidden_dim 128 encoder_dim: 1280 dropout_rate: 0.2 @@ -133,7 +133,7 @@ trainer: stochastic_weight_avg: true auto_scale_batch_size: binsearch auto_lr_find: false - gradient_clip_val: 0.5 + gradient_clip_val: 0.0 fast_dev_run: false gpus: 1 precision: 16 @@ -144,5 +144,5 @@ trainer: limit_val_batches: 1.0 limit_test_batches: 1.0 resume_from_checkpoint: null - accumulate_grad_batches: 32 + accumulate_grad_batches: 16 overfit_batches: 0 |