1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
"""CNN decoder for the VQ-VAE."""
from typing import List, Optional, Tuple, Type
import torch
from torch import nn
from torch import Tensor
from text_recognizer.networks.util import activation_function
from text_recognizer.networks.vqvae.encoder import _ResidualBlock
class Decoder(nn.Module):
"""A CNN encoder network."""
def __init__(
self,
channels: List[int],
kernel_sizes: List[int],
strides: List[int],
num_residual_layers: int,
embedding_dim: int,
upsampling: Optional[List[List[int]]] = None,
activation: str = "leaky_relu",
dropout_rate: float = 0.0,
) -> None:
super().__init__()
if dropout_rate:
if activation == "selu":
dropout = nn.AlphaDropout(p=dropout_rate)
else:
dropout = nn.Dropout(p=dropout_rate)
else:
dropout = None
self.upsampling = upsampling
self.res_block = nn.ModuleList([])
self.upsampling_block = nn.ModuleList([])
self.embedding_dim = embedding_dim
activation = activation_function(activation)
# Configure encoder.
self.decoder = self._build_decoder(
channels,
kernel_sizes,
strides,
num_residual_layers,
activation,
dropout,
)
def _build_decompression_block(
self,
in_channels: int,
channels: int,
kernel_sizes: List[int],
strides: List[int],
activation: Type[nn.Module],
dropout: Optional[Type[nn.Module]],
) -> nn.ModuleList:
modules = nn.ModuleList([])
configuration = zip(channels, kernel_sizes, strides)
for i, (out_channels, kernel_size, stride) in enumerate(configuration):
modules.append(
nn.Sequential(
nn.ConvTranspose2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=1,
),
activation,
)
)
if self.upsampling and i < len(self.upsampling):
modules.append(
nn.Upsample(size=self.upsampling[i]),
)
if dropout is not None:
modules.append(dropout)
in_channels = out_channels
modules.extend(
nn.Sequential(
nn.ConvTranspose2d(
in_channels, 1, kernel_size=kernel_size, stride=stride, padding=1
),
nn.Tanh(),
)
)
return modules
def _build_decoder(
self,
channels: int,
kernel_sizes: List[int],
strides: List[int],
num_residual_layers: int,
activation: Type[nn.Module],
dropout: Optional[Type[nn.Module]],
) -> nn.Sequential:
self.res_block.append(
nn.Conv2d(
self.embedding_dim,
channels[0],
kernel_size=1,
stride=1,
)
)
# Bottleneck module.
self.res_block.extend(
nn.ModuleList(
[
_ResidualBlock(channels[0], channels[0], dropout)
for i in range(num_residual_layers)
]
)
)
# Decompression module
self.upsampling_block.extend(
self._build_decompression_block(
channels[0], channels[1:], kernel_sizes, strides, activation, dropout
)
)
self.res_block = nn.Sequential(*self.res_block)
self.upsampling_block = nn.Sequential(*self.upsampling_block)
return nn.Sequential(self.res_block, self.upsampling_block)
def forward(self, z_q: Tensor) -> Tensor:
"""Reconstruct input from given codes."""
x_reconstruction = self.decoder(z_q)
return x_reconstruction
|