summaryrefslogtreecommitdiff
path: root/training/conf/network
diff options
context:
space:
mode:
Diffstat (limited to 'training/conf/network')
-rw-r--r--training/conf/network/conv_transformer.yaml2
-rw-r--r--training/conf/network/decoder/transformer_decoder.yaml30
-rw-r--r--training/conf/network/encoder/efficientnet.yaml5
3 files changed, 1 insertions, 36 deletions
diff --git a/training/conf/network/conv_transformer.yaml b/training/conf/network/conv_transformer.yaml
index 54eb028..39c5c46 100644
--- a/training/conf/network/conv_transformer.yaml
+++ b/training/conf/network/conv_transformer.yaml
@@ -10,7 +10,7 @@ encoder:
bn_momentum: 0.99
bn_eps: 1.0e-3
depth: 3
- out_channels: 128
+ out_channels: *hidden_dim
decoder:
_target_: text_recognizer.networks.transformer.Decoder
depth: 6
diff --git a/training/conf/network/decoder/transformer_decoder.yaml b/training/conf/network/decoder/transformer_decoder.yaml
deleted file mode 100644
index 4588ee9..0000000
--- a/training/conf/network/decoder/transformer_decoder.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-_target_: text_recognizer.networks.transformer.decoder.Decoder
-depth: 4
-block:
- _target_: text_recognizer.networks.transformer.decoder.DecoderBlock
- self_attn:
- _target_: text_recognizer.networks.transformer.attention.Attention
- dim: 64
- num_heads: 4
- dim_head: 64
- dropout_rate: 0.05
- causal: true
- rotary_embedding:
- _target_: text_recognizer.networks.transformer.embeddings.rotary.RotaryEmbedding
- dim: 128
- cross_attn:
- _target_: text_recognizer.networks.transformer.attention.Attention
- dim: 64
- num_heads: 4
- dim_head: 64
- dropout_rate: 0.05
- causal: false
- norm:
- _target_: text_recognizer.networks.transformer.norm.RMSNorm
- normalized_shape: 192
- ff:
- _target_: text_recognizer.networks.transformer.mlp.FeedForward
- dim_out: null
- expansion_factor: 4
- glu: true
- dropout_rate: 0.2
diff --git a/training/conf/network/encoder/efficientnet.yaml b/training/conf/network/encoder/efficientnet.yaml
deleted file mode 100644
index a7be069..0000000
--- a/training/conf/network/encoder/efficientnet.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_target_: text_recognizer.networks.efficientnet.EfficientNet
-arch: b0
-stochastic_dropout_rate: 0.2
-bn_momentum: 0.99
-bn_eps: 1.0e-3