summaryrefslogtreecommitdiff
path: root/training/conf/experiment/cnn_htr_wp_lines.yaml
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-10-25 22:32:10 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-10-25 22:32:10 +0200
commit41c3e99fe57874ba1855c893bf47087d474ec6b8 (patch)
tree2ba286209251f7ec8d7ab4cf061cffe5de86f398 /training/conf/experiment/cnn_htr_wp_lines.yaml
parentda29c1cf4d062087f1b29dc9402ee6384203b690 (diff)
Updates to configs
Diffstat (limited to 'training/conf/experiment/cnn_htr_wp_lines.yaml')
-rw-r--r--training/conf/experiment/cnn_htr_wp_lines.yaml52
1 files changed, 28 insertions, 24 deletions
diff --git a/training/conf/experiment/cnn_htr_wp_lines.yaml b/training/conf/experiment/cnn_htr_wp_lines.yaml
index f467b74..6cdd023 100644
--- a/training/conf/experiment/cnn_htr_wp_lines.yaml
+++ b/training/conf/experiment/cnn_htr_wp_lines.yaml
@@ -1,5 +1,8 @@
+# @package _global_
+
defaults:
- override /mapping: null
+ - override /callbacks: htr
- override /criterion: null
- override /datamodule: null
- override /network: null
@@ -7,28 +10,27 @@ defaults:
- override /lr_schedulers: null
- override /optimizers: null
-epochs: &epochs 256
+epochs: &epochs 512
ignore_index: &ignore_index 1000
num_classes: &num_classes 1006
max_output_len: &max_output_len 72
criterion:
- _target_: torch.nn.CrossEntropyLoss
- ignore_index: *ignore_index
- # _target_: text_recognizer.criterions.label_smoothing.LabelSmoothingLoss
- # smoothing: 0.1
+ # _target_: torch.nn.CrossEntropyLoss
# ignore_index: *ignore_index
+ _target_: text_recognizer.criterions.label_smoothing.LabelSmoothingLoss
+ smoothing: 0.1
+ ignore_index: *ignore_index
-mapping:
- _target_: text_recognizer.data.word_piece_mapping.WordPieceMapping
- num_features: 1000
- tokens: iamdb_1kwp_tokens_1000.txt
- lexicon: iamdb_1kwp_lex_1000.txt
- data_dir: null
- use_words: false
- prepend_wordsep: false
- special_tokens: [ <s>, <e>, <p> ]
- # extra_symbols: [ "\n" ]
+mapping: &mapping
+ mapping:
+ _target_: text_recognizer.data.mappings.word_piece_mapping.WordPieceMapping
+ num_features: 1000
+ tokens: iamdb_1kwp_tokens_1000.txt
+ lexicon: iamdb_1kwp_lex_1000.txt
+ use_words: false
+ prepend_wordsep: false
+ special_tokens: [ <s>, <e>, <p> ]
callbacks:
stochastic_weight_averaging:
@@ -42,7 +44,7 @@ callbacks:
optimizers:
madgrad:
_target_: madgrad.MADGRAD
- lr: 3.0e-4
+ lr: 1.0e-4
momentum: 0.9
weight_decay: 0
eps: 1.0e-6
@@ -52,11 +54,11 @@ optimizers:
lr_schedulers:
network:
_target_: torch.optim.lr_scheduler.OneCycleLR
- max_lr: 3.0e-4
+ max_lr: 1.0e-4
total_steps: null
epochs: *epochs
- steps_per_epoch: 90
- pct_start: 0.1
+ steps_per_epoch: 179
+ pct_start: 0.03
anneal_strategy: cos
cycle_momentum: true
base_momentum: 0.85
@@ -72,17 +74,19 @@ lr_schedulers:
datamodule:
_target_: text_recognizer.data.iam_lines.IAMLines
- batch_size: 32
num_workers: 12
- train_fraction: 0.8
- augment: true
+ batch_size: 16
+ train_fraction: 0.9
pin_memory: true
- word_pieces: true
+ transform: transform/iam_lines.yaml
+ test_transform: transform/iam_lines.yaml
+ target_transform: target_transform/word_piece_iam.yaml
+ <<: *mapping
network:
_target_: text_recognizer.networks.conv_transformer.ConvTransformer
input_dims: [1, 56, 1024]
- hidden_dim: &hidden_dim 128
+ hidden_dim: &hidden_dim 256
encoder_dim: 1280
dropout_rate: 0.2
num_classes: *num_classes