summaryrefslogtreecommitdiff
path: root/training/conf/experiment/conv_transformer_paragraphs.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/experiment/conv_transformer_paragraphs.yaml')
-rw-r--r--training/conf/experiment/conv_transformer_paragraphs.yaml68
1 files changed, 45 insertions, 23 deletions
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml
index 1465e62..7f0273f 100644
--- a/training/conf/experiment/conv_transformer_paragraphs.yaml
+++ b/training/conf/experiment/conv_transformer_paragraphs.yaml
@@ -10,13 +10,15 @@ defaults:
- override /lr_schedulers: null
- override /optimizers: null
-epochs: &epochs 629
+epochs: &epochs 600
+num_classes: &num_classes 58
ignore_index: &ignore_index 3
+max_output_len: &max_output_len 682
summary: [[1, 1, 576, 640], [1, 682]]
criterion:
ignore_index: *ignore_index
- # label_smoothing: 0.1
+ label_smoothing: 0.05
callbacks:
stochastic_weight_averaging:
@@ -28,32 +30,52 @@ callbacks:
device: null
optimizers:
- radam:
- _target_: torch.optim.RAdam
- lr: 1.5e-4
- betas: [0.9, 0.999]
- weight_decay: 0
- eps: 1.0e-8
- parameters: network
+ _target_: torch.optim.RAdam
+ lr: 3.0e-4
+ betas: [0.9, 0.999]
+ weight_decay: 0
+ eps: 1.0e-8
+ parameters: network
lr_schedulers:
- network:
- _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
- mode: min
- factor: 0.5
- patience: 10
- threshold: 1.0e-4
- threshold_mode: rel
- cooldown: 0
- min_lr: 1.0e-6
- eps: 1.0e-8
- verbose: false
- interval: epoch
- monitor: val/loss
+ _target_: torch.optim.lr_scheduler.OneCycleLR
+ max_lr: 2.0e-4
+ total_steps: null
+ epochs: *epochs
+ steps_per_epoch: 3201
+ pct_start: 0.1
+ anneal_strategy: cos
+ cycle_momentum: true
+ base_momentum: 0.85
+ max_momentum: 0.95
+ div_factor: 25.0
+ final_div_factor: 10000.0
+ three_phase: true
+ last_epoch: -1
+ verbose: false
+ interval: step
+ monitor: val/cer
datamodule:
batch_size: 6
- train_fraction: 0.9
+ train_fraction: 0.95
+
+network:
+ input_dims: [1, 1, 576, 640]
+ num_classes: *num_classes
+ pad_index: *ignore_index
+ encoder:
+ depth: 5
+ decoder:
+ depth: 6
+ pixel_embedding:
+ shape: [36, 40]
+
+model:
+ max_output_len: *max_output_len
trainer:
+ gradient_clip_val: 0.5
max_epochs: *epochs
+ accumulate_grad_batches: 1
+ resume_from_checkpoint: /home/aktersnurra/projects/text-recognizer/training/logs/runs/2022-06-16/21-00-39/checkpoints/last.ckpt