summaryrefslogtreecommitdiff
path: root/training/conf/experiment/cnn_transformer_paragraphs.yaml
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-10-07 08:57:01 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-10-07 08:57:01 +0200
commit4c0a730003ac075ad927708e1ac0e6300a360373 (patch)
tree36a573b6f4d1f09f5a2675392a7068718acab9da /training/conf/experiment/cnn_transformer_paragraphs.yaml
parent2150fd365f9512d55c78cdbef096b7240f619b21 (diff)
Updates to cfgs
Diffstat (limited to 'training/conf/experiment/cnn_transformer_paragraphs.yaml')
-rw-r--r--training/conf/experiment/cnn_transformer_paragraphs.yaml18
1 files changed, 9 insertions, 9 deletions
diff --git a/training/conf/experiment/cnn_transformer_paragraphs.yaml b/training/conf/experiment/cnn_transformer_paragraphs.yaml
index b415c29..e9cd254 100644
--- a/training/conf/experiment/cnn_transformer_paragraphs.yaml
+++ b/training/conf/experiment/cnn_transformer_paragraphs.yaml
@@ -10,7 +10,7 @@ defaults:
- override /optimizers: null
-epochs: &epochs 512
+epochs: &epochs 1000
ignore_index: &ignore_index 3
num_classes: &num_classes 58
max_output_len: &max_output_len 682
@@ -36,7 +36,7 @@ callbacks:
optimizers:
madgrad:
_target_: madgrad.MADGRAD
- lr: 3.0e-4
+ lr: 2.0e-4
momentum: 0.9
weight_decay: 0
eps: 1.0e-6
@@ -46,11 +46,11 @@ optimizers:
lr_schedulers:
network:
_target_: torch.optim.lr_scheduler.OneCycleLR
- max_lr: 3.0e-4
+ max_lr: 2.0e-4
total_steps: null
epochs: *epochs
- steps_per_epoch: 52
- pct_start: 0.1
+ steps_per_epoch: 79
+ pct_start: 0.3
anneal_strategy: cos
cycle_momentum: true
base_momentum: 0.85
@@ -70,13 +70,13 @@ datamodule:
num_workers: 12
train_fraction: 0.8
augment: true
- pin_memory: false
+ pin_memory: true
word_pieces: false
resize: null
network:
_target_: text_recognizer.networks.conv_transformer.ConvTransformer
- input_dims: [1, 56, 1024]
+ input_dims: [1, 576, 640]
hidden_dim: &hidden_dim 128
encoder_dim: 1280
dropout_rate: 0.2
@@ -133,7 +133,7 @@ trainer:
stochastic_weight_avg: true
auto_scale_batch_size: binsearch
auto_lr_find: false
- gradient_clip_val: 0.5
+ gradient_clip_val: 0.0
fast_dev_run: false
gpus: 1
precision: 16
@@ -144,5 +144,5 @@ trainer:
limit_val_batches: 1.0
limit_test_batches: 1.0
resume_from_checkpoint: null
- accumulate_grad_batches: 32
+ accumulate_grad_batches: 16
overfit_batches: 0