summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--training/conf/config.yaml1
-rw-r--r--training/conf/experiment/conv_transformer_lines.yaml1
-rw-r--r--training/conf/experiment/conv_transformer_paragraphs.yaml8
-rw-r--r--training/conf/model/lit_transformer.yaml3
-rw-r--r--training/conf/network/conv_transformer.yaml3
-rw-r--r--training/conf/network/efficientnet.yaml7
6 files changed, 12 insertions, 11 deletions
diff --git a/training/conf/config.yaml b/training/conf/config.yaml
index fc06f7f..fdeb5d2 100644
--- a/training/conf/config.yaml
+++ b/training/conf/config.yaml
@@ -8,7 +8,6 @@ defaults:
- hydra: default
- logger: wandb
- lr_schedulers: cosine_annealing
- - mapping: characters
- model: lit_transformer
- network: conv_transformer
- optimizers: radam
diff --git a/training/conf/experiment/conv_transformer_lines.yaml b/training/conf/experiment/conv_transformer_lines.yaml
index 260014c..48df78d 100644
--- a/training/conf/experiment/conv_transformer_lines.yaml
+++ b/training/conf/experiment/conv_transformer_lines.yaml
@@ -1,7 +1,6 @@
# @package _global_
defaults:
- - override /mapping: characters
- override /criterion: cross_entropy
- override /callbacks: htr
- override /datamodule: iam_lines
diff --git a/training/conf/experiment/conv_transformer_paragraphs.yaml b/training/conf/experiment/conv_transformer_paragraphs.yaml
index 7f0273f..d0d0314 100644
--- a/training/conf/experiment/conv_transformer_paragraphs.yaml
+++ b/training/conf/experiment/conv_transformer_paragraphs.yaml
@@ -1,7 +1,6 @@
# @package _global_
defaults:
- - override /mapping: characters
- override /criterion: cross_entropy
- override /callbacks: htr
- override /datamodule: iam_extended_paragraphs
@@ -39,11 +38,11 @@ optimizers:
lr_schedulers:
_target_: torch.optim.lr_scheduler.OneCycleLR
- max_lr: 2.0e-4
+ max_lr: 3.0e-4
total_steps: null
epochs: *epochs
steps_per_epoch: 3201
- pct_start: 0.1
+ pct_start: 0.3
anneal_strategy: cos
cycle_momentum: true
base_momentum: 0.85
@@ -69,7 +68,7 @@ network:
decoder:
depth: 6
pixel_embedding:
- shape: [36, 40]
+ shape: [18, 78]
model:
max_output_len: *max_output_len
@@ -78,4 +77,3 @@ trainer:
gradient_clip_val: 0.5
max_epochs: *epochs
accumulate_grad_batches: 1
- resume_from_checkpoint: /home/aktersnurra/projects/text-recognizer/training/logs/runs/2022-06-16/21-00-39/checkpoints/last.ckpt
diff --git a/training/conf/model/lit_transformer.yaml b/training/conf/model/lit_transformer.yaml
index b795078..7d252b4 100644
--- a/training/conf/model/lit_transformer.yaml
+++ b/training/conf/model/lit_transformer.yaml
@@ -3,6 +3,3 @@ max_output_len: 682
start_token: <s>
end_token: <e>
pad_token: <p>
-mapping:
- _target_: text_recognizer.data.mappings.EmnistMapping
- # extra_symbols: ["\n"]
diff --git a/training/conf/network/conv_transformer.yaml b/training/conf/network/conv_transformer.yaml
index 39c5c46..ccdf960 100644
--- a/training/conf/network/conv_transformer.yaml
+++ b/training/conf/network/conv_transformer.yaml
@@ -11,6 +11,7 @@ encoder:
bn_eps: 1.0e-3
depth: 3
out_channels: *hidden_dim
+ stride: [2, 1]
decoder:
_target_: text_recognizer.networks.transformer.Decoder
depth: 6
@@ -46,4 +47,4 @@ decoder:
pixel_embedding:
_target_: text_recognizer.networks.transformer.AxialPositionalEmbedding
dim: *hidden_dim
- shape: [72, 80]
+ shape: [17, 78]
diff --git a/training/conf/network/efficientnet.yaml b/training/conf/network/efficientnet.yaml
new file mode 100644
index 0000000..bbe68dd
--- /dev/null
+++ b/training/conf/network/efficientnet.yaml
@@ -0,0 +1,7 @@
+_target_: text_recognizer.networks.efficientnet.EfficientNet
+arch: b0
+stochastic_dropout_rate: 0.2
+bn_momentum: 0.99
+bn_eps: 1.0e-3
+stride: [2, 1]
+out_channels: 144