summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
Diffstat (limited to 'training')
-rw-r--r--training/conf/experiment/conv_transformer_paragraphs.yaml35
-rw-r--r--training/conf/network/conv_transformer.yaml4
-rw-r--r--training/conf/network/efficientnet.yaml1
3 files changed, 17 insertions, 23 deletions
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml
index 80f6ce6..41c236d 100644
--- a/training/conf/experiment/conv_transformer_paragraphs.yaml
+++ b/training/conf/experiment/conv_transformer_paragraphs.yaml
@@ -41,26 +41,21 @@ optimizer:
eps: 1.0e-8
lr_scheduler:
- _target_: torch.optim.lr_scheduler.OneCycleLR
- max_lr: 3.0e-4
- total_steps: null
- epochs: *epochs
- steps_per_epoch: 3358
- pct_start: 0.1
- anneal_strategy: cos
- cycle_momentum: true
- base_momentum: 0.85
- max_momentum: 0.95
- div_factor: 25.0
- final_div_factor: 10000.0
- three_phase: true
- last_epoch: -1
+ _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
+ mode: min
+ factor: 0.8
+ patience: 10
+ threshold: 1.0e-4
+ threshold_mode: rel
+ cooldown: 0
+ min_lr: 1.0e-5
+ eps: 1.0e-8
verbose: false
- interval: step
+ interval: epoch
monitor: val/cer
datamodule:
- batch_size: 6
+ batch_size: 8
train_fraction: 0.95
network:
@@ -68,16 +63,16 @@ network:
num_classes: *num_classes
pad_index: *ignore_index
encoder:
- depth: 6
- decoder:
depth: 4
+ decoder:
+ depth: 6
pixel_embedding:
- shape: [18, 80]
+ shape: [18, 79]
model:
max_output_len: *max_output_len
trainer:
- gradient_clip_val: 0.5
+ gradient_clip_val: 1.0
max_epochs: *epochs
accumulate_grad_batches: 1
diff --git a/training/conf/network/conv_transformer.yaml b/training/conf/network/conv_transformer.yaml
index 016416c..24857c8 100644
--- a/training/conf/network/conv_transformer.yaml
+++ b/training/conf/network/conv_transformer.yaml
@@ -1,6 +1,6 @@
_target_: text_recognizer.networks.ConvTransformer
input_dims: [1, 1, 576, 640]
-hidden_dim: &hidden_dim 128
+hidden_dim: &hidden_dim 144
num_classes: 58
pad_index: 3
encoder:
@@ -46,4 +46,4 @@ decoder:
pixel_embedding:
_target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
dim: *hidden_dim
- shape: [18, 80]
+ shape: [18, 79]
diff --git a/training/conf/network/efficientnet.yaml b/training/conf/network/efficientnet.yaml
index bbe68dd..4dd148f 100644
--- a/training/conf/network/efficientnet.yaml
+++ b/training/conf/network/efficientnet.yaml
@@ -3,5 +3,4 @@ arch: b0
stochastic_dropout_rate: 0.2
bn_momentum: 0.99
bn_eps: 1.0e-3
-stride: [2, 1]
out_channels: 144