blob: 890948c9ce06d37f850598b84bb547150fac4873 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
defaults:
- override /network: null
- override /criterion: null
- override /datamodule: null
- override /model: lit_vqgan
- override /callbacks: wandb_vae
- override /optimizers: null
- override /lr_schedulers: null
criterion:
_target_: text_recognizer.criterions.vqgan_loss.VQGANLoss
reconstruction_loss:
_target_: torch.nn.BCEWithLogitsLoss
reduction: mean
discriminator:
_target_: text_recognizer.criterions.n_layer_discriminator.NLayerDiscriminator
in_channels: 1
num_channels: 64
num_layers: 3
commitment_weight: 0.25
discriminator_weight: 0.8
discriminator_factor: 1.0
discriminator_iter_start: 1.5e4
datamodule:
_target_: text_recognizer.data.iam_lines.IAMLines
batch_size: 24
num_workers: 12
train_fraction: 0.8
augment: true
pin_memory: false
lr_schedulers:
generator:
_target_: torch.optim.lr_scheduler.CosineAnnealingLR
T_max: 64
eta_min: 4.5e-6
last_epoch: -1
interval: epoch
monitor: val/loss
# discriminator:
# _target_: torch.optim.lr_scheduler.CosineAnnealingLR
# T_max: 64
# eta_min: 0.0
# last_epoch: -1
#
# interval: epoch
# monitor: val/loss
optimizers:
generator:
_target_: madgrad.MADGRAD
lr: 1.0e-4
momentum: 0.5
weight_decay: 0
eps: 1.0e-7
parameters: network
discriminator:
_target_: madgrad.MADGRAD
lr: 4.5e-6
momentum: 0.5
weight_decay: 0
eps: 1.0e-6
parameters: loss_fn.discriminator
network:
_target_: text_recognizer.networks.vqvae.vqvae.VQVAE
hidden_dim: 256
embedding_dim: 32
num_embeddings: 512
decay: 0.99
encoder:
_target_: text_recognizer.networks.vqvae.encoder.Encoder
in_channels: 1
hidden_dim: 32
channels_multipliers: [1, 4, 8]
dropout_rate: 0.0
activation: mish
use_norm: true
num_residuals: 2
residual_channels: 32
decoder:
_target_: text_recognizer.networks.vqvae.decoder.Decoder
out_channels: 1
hidden_dim: 32
channels_multipliers: [8, 4, 1]
dropout_rate: 0.0
activation: mish
use_norm: true
num_residuals: 2
residual_channels: 32
trainer:
max_epochs: 64
# limit_train_batches: 0.1
# limit_val_batches: 0.1
# gradient_clip_val: 100
# tune: false
# train: true
# test: false
summary: [2, 1, 56, 1024]
|