summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2022-06-18 00:59:17 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2022-06-18 00:59:17 +0200
commit062eebf0d690365cf7d9f6019d147ea195cc3a63 (patch)
tree0e6c6ba1a4c23ed9b51787101d4f7b588830256f /training
parent7dfa030557ad500a54126b8af40f6f0c42c5d620 (diff)
Update configs
Diffstat (limited to 'training')
-rw-r--r--training/conf/config.yaml6
-rw-r--r--training/conf/experiment/conv_transformer_lines.yaml50
-rw-r--r--training/conf/experiment/conv_transformer_paragraphs.yaml68
-rw-r--r--training/conf/lr_schedulers/cosine_annealing.yaml13
-rw-r--r--training/conf/optimizers/radam.yaml13
5 files changed, 83 insertions, 67 deletions
diff --git a/training/conf/config.yaml b/training/conf/config.yaml
index f6118a9..fc06f7f 100644
--- a/training/conf/config.yaml
+++ b/training/conf/config.yaml
@@ -7,13 +7,11 @@ defaults:
- datamodule: iam_extended_paragraphs
- hydra: default
- logger: wandb
- - lr_schedulers:
- - cosine_annealing
+ - lr_schedulers: cosine_annealing
- mapping: characters
- model: lit_transformer
- network: conv_transformer
- - optimizers:
- - radam
+ - optimizers: radam
- trainer: default
seed: 4711
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml
index 38b13a5..260014c 100644
--- a/training/conf/experiment/conv_transformer_lines.yaml
+++ b/training/conf/experiment/conv_transformer_lines.yaml
@@ -30,37 +30,35 @@ callbacks:
device: null
optimizers:
- radam:
- _target_: torch.optim.RAdam
- lr: 3.0e-4
- betas: [0.9, 0.999]
- weight_decay: 0
- eps: 1.0e-8
- parameters: network
+ _target_: torch.optim.RAdam
+ lr: 3.0e-4
+ betas: [0.9, 0.999]
+ weight_decay: 0
+ eps: 1.0e-8
+ parameters: network
lr_schedulers:
- network:
- _target_: torch.optim.lr_scheduler.OneCycleLR
- max_lr: 3.0e-4
- total_steps: null
- epochs: *epochs
- steps_per_epoch: 1284
- pct_start: 0.3
- anneal_strategy: cos
- cycle_momentum: true
- base_momentum: 0.85
- max_momentum: 0.95
- div_factor: 25.0
- final_div_factor: 10000.0
- three_phase: true
- last_epoch: -1
- verbose: false
- interval: step
- monitor: val/cer
+ _target_: torch.optim.lr_scheduler.OneCycleLR
+ max_lr: 3.0e-4
+ total_steps: null
+ epochs: *epochs
+ steps_per_epoch: 1354
+ pct_start: 0.3
+ anneal_strategy: cos
+ cycle_momentum: true
+ base_momentum: 0.85
+ max_momentum: 0.95
+ div_factor: 25.0
+ final_div_factor: 10000.0
+ three_phase: true
+ last_epoch: -1
+ verbose: false
+ interval: step
+ monitor: val/cer
datamodule:
batch_size: 8
- train_fraction: 0.9
+ train_fraction: 0.95
network:
input_dims: [1, 1, 56, 1024]
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml
index 1465e62..7f0273f 100644
--- a/training/conf/experiment/conv_transformer_paragraphs.yaml
+++ b/training/conf/experiment/conv_transformer_paragraphs.yaml
@@ -10,13 +10,15 @@ defaults:
- override /lr_schedulers: null
- override /optimizers: null
-epochs: &epochs 629
+epochs: &epochs 600
+num_classes: &num_classes 58
ignore_index: &ignore_index 3
+max_output_len: &max_output_len 682
summary: [[1, 1, 576, 640], [1, 682]]
criterion:
ignore_index: *ignore_index
- # label_smoothing: 0.1
+ label_smoothing: 0.05
callbacks:
stochastic_weight_averaging:
@@ -28,32 +30,52 @@ callbacks:
device: null
optimizers:
- radam:
- _target_: torch.optim.RAdam
- lr: 1.5e-4
- betas: [0.9, 0.999]
- weight_decay: 0
- eps: 1.0e-8
- parameters: network
+ _target_: torch.optim.RAdam
+ lr: 3.0e-4
+ betas: [0.9, 0.999]
+ weight_decay: 0
+ eps: 1.0e-8
+ parameters: network
lr_schedulers:
- network:
- _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
- mode: min
- factor: 0.5
- patience: 10
- threshold: 1.0e-4
- threshold_mode: rel
- cooldown: 0
- min_lr: 1.0e-6
- eps: 1.0e-8
- verbose: false
- interval: epoch
- monitor: val/loss
+ _target_: torch.optim.lr_scheduler.OneCycleLR
+ max_lr: 2.0e-4
+ total_steps: null
+ epochs: *epochs
+ steps_per_epoch: 3201
+ pct_start: 0.1
+ anneal_strategy: cos
+ cycle_momentum: true
+ base_momentum: 0.85
+ max_momentum: 0.95
+ div_factor: 25.0
+ final_div_factor: 10000.0
+ three_phase: true
+ last_epoch: -1
+ verbose: false
+ interval: step
+ monitor: val/cer
datamodule:
batch_size: 6
- train_fraction: 0.9
+ train_fraction: 0.95
+
+network:
+ input_dims: [1, 1, 576, 640]
+ num_classes: *num_classes
+ pad_index: *ignore_index
+ encoder:
+ depth: 5
+ decoder:
+ depth: 6
+ pixel_embedding:
+ shape: [36, 40]
+
+model:
+ max_output_len: *max_output_len
trainer:
+ gradient_clip_val: 0.5
max_epochs: *epochs
+ accumulate_grad_batches: 1
+ resume_from_checkpoint: /home/aktersnurra/projects/text-recognizer/training/logs/runs/2022-06-16/21-00-39/checkpoints/last.ckpt
diff --git a/training/conf/lr_schedulers/cosine_annealing.yaml b/training/conf/lr_schedulers/cosine_annealing.yaml
index c53ee3a..e8364f0 100644
--- a/training/conf/lr_schedulers/cosine_annealing.yaml
+++ b/training/conf/lr_schedulers/cosine_annealing.yaml
@@ -1,8 +1,7 @@
-cosine_annealing:
- _target_: torch.optim.lr_scheduler.CosineAnnealingLR
- T_max: 256
- eta_min: 0.0
- last_epoch: -1
+_target_: torch.optim.lr_scheduler.CosineAnnealingLR
+T_max: 256
+eta_min: 0.0
+last_epoch: -1
- interval: epoch
- monitor: val/loss
+interval: epoch
+monitor: val/loss
diff --git a/training/conf/optimizers/radam.yaml b/training/conf/optimizers/radam.yaml
index 7ee1234..d11fcb5 100644
--- a/training/conf/optimizers/radam.yaml
+++ b/training/conf/optimizers/radam.yaml
@@ -1,7 +1,6 @@
-radam:
- _target_: torch.optim.RAdam
- lr: 1.5e-4
- betas: [0.9, 0.999]
- weight_decay: 1.0e-4
- eps: 1.0e-8
- parameters: network
+_target_: torch.optim.RAdam
+lr: 1.5e-4
+betas: [0.9, 0.999]
+weight_decay: 1.0e-4
+eps: 1.0e-8
+parameters: network