summaryrefslogtreecommitdiff
path: root/text_recognizer
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-07 22:12:10 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2021-04-07 22:12:10 +0200
commit8afa8e1c6e9623b0dea86236da04b2b4173e9443 (patch)
tree4c9462507b3b3076aa26f08ab629f64b90aed2cb /text_recognizer
parent33190bc9c0c377edab280efe4b0bd0e53bb6cb00 (diff)
Fixed typing and typos, train script load config, reformatted
Diffstat (limited to 'text_recognizer')
-rw-r--r--text_recognizer/data/iam_extended_paragraphs.py10
-rw-r--r--text_recognizer/data/iam_paragraphs.py9
-rw-r--r--text_recognizer/models/base.py10
-rw-r--r--text_recognizer/models/transformer.py8
-rw-r--r--text_recognizer/networks/__init__.py1
-rw-r--r--text_recognizer/networks/image_transformer.py6
-rw-r--r--text_recognizer/networks/residual_network.py6
-rw-r--r--text_recognizer/networks/transducer/transducer.py7
-rw-r--r--text_recognizer/networks/vqvae/decoder.py18
-rw-r--r--text_recognizer/networks/vqvae/encoder.py12
10 files changed, 23 insertions, 64 deletions
diff --git a/text_recognizer/data/iam_extended_paragraphs.py b/text_recognizer/data/iam_extended_paragraphs.py
index c144341..d2529b4 100644
--- a/text_recognizer/data/iam_extended_paragraphs.py
+++ b/text_recognizer/data/iam_extended_paragraphs.py
@@ -18,16 +18,10 @@ class IAMExtendedParagraphs(BaseDataModule):
super().__init__(batch_size, num_workers)
self.iam_paragraphs = IAMParagraphs(
- batch_size,
- num_workers,
- train_fraction,
- augment,
+ batch_size, num_workers, train_fraction, augment,
)
self.iam_synthetic_paragraphs = IAMSyntheticParagraphs(
- batch_size,
- num_workers,
- train_fraction,
- augment,
+ batch_size, num_workers, train_fraction, augment,
)
self.dims = self.iam_paragraphs.dims
diff --git a/text_recognizer/data/iam_paragraphs.py b/text_recognizer/data/iam_paragraphs.py
index 314d458..f588587 100644
--- a/text_recognizer/data/iam_paragraphs.py
+++ b/text_recognizer/data/iam_paragraphs.py
@@ -161,10 +161,7 @@ def get_dataset_properties() -> Dict:
"min": min(_get_property_values("num_lines")),
"max": max(_get_property_values("num_lines")),
},
- "crop_shape": {
- "min": crop_shapes.min(axis=0),
- "max": crop_shapes.max(axis=0),
- },
+ "crop_shape": {"min": crop_shapes.min(axis=0), "max": crop_shapes.max(axis=0),},
"aspect_ratio": {
"min": aspect_ratio.min(axis=0),
"max": aspect_ratio.max(axis=0),
@@ -285,9 +282,7 @@ def get_transform(image_shape: Tuple[int, int], augment: bool) -> transforms.Com
),
transforms.ColorJitter(brightness=(0.8, 1.6)),
transforms.RandomAffine(
- degrees=1,
- shear=(-10, 10),
- interpolation=InterpolationMode.BILINEAR,
+ degrees=1, shear=(-10, 10), interpolation=InterpolationMode.BILINEAR,
),
]
else:
diff --git a/text_recognizer/models/base.py b/text_recognizer/models/base.py
index f4df3f3..3c1919e 100644
--- a/text_recognizer/models/base.py
+++ b/text_recognizer/models/base.py
@@ -2,7 +2,7 @@
from typing import Any, Dict, List, Union, Tuple, Type
import madgrad
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, OmegaConf
import pytorch_lightning as pl
import torch
from torch import nn
@@ -16,9 +16,9 @@ class LitBaseModel(pl.LightningModule):
def __init__(
self,
network: Type[nn.Module],
- optimizer: Union[OmegaConf, Dict],
- lr_scheduler: Union[OmegaConf, Dict],
- criterion: Union[OmegaConf, Dict],
+ optimizer: Union[DictConfig, Dict],
+ lr_scheduler: Union[DictConfig, Dict],
+ criterion: Union[DictConfig, Dict],
monitor: str = "val_loss",
) -> None:
super().__init__()
@@ -34,7 +34,7 @@ class LitBaseModel(pl.LightningModule):
self.test_acc = torchmetrics.Accuracy()
@staticmethod
- def configure_criterion(criterion: Union[OmegaConf, Dict]) -> Type[nn.Module]:
+ def configure_criterion(criterion: Union[DictConfig, Dict]) -> Type[nn.Module]:
"""Returns a loss functions."""
criterion = OmegaConf.create(criterion)
args = {} or criterion.args
diff --git a/text_recognizer/models/transformer.py b/text_recognizer/models/transformer.py
index 983e274..b23685b 100644
--- a/text_recognizer/models/transformer.py
+++ b/text_recognizer/models/transformer.py
@@ -1,7 +1,7 @@
"""PyTorch Lightning model for base Transformers."""
from typing import Dict, List, Optional, Union, Tuple
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, OmegaConf
import pytorch_lightning as pl
import torch
from torch import nn
@@ -20,9 +20,9 @@ class LitTransformerModel(LitBaseModel):
def __init__(
self,
network: Type[nn, Module],
- optimizer: Union[OmegaConf, Dict],
- lr_scheduler: Union[OmegaConf, Dict],
- criterion: Union[OmegaConf, Dict],
+ optimizer: Union[DictConfig, Dict],
+ lr_scheduler: Union[DictConfig, Dict],
+ criterion: Union[DictConfig, Dict],
monitor: str = "val_loss",
mapping: Optional[List[str]] = None,
) -> None:
diff --git a/text_recognizer/networks/__init__.py b/text_recognizer/networks/__init__.py
index 4dcaf2e..979149f 100644
--- a/text_recognizer/networks/__init__.py
+++ b/text_recognizer/networks/__init__.py
@@ -1,3 +1,2 @@
"""Network modules"""
from .image_transformer import ImageTransformer
-
diff --git a/text_recognizer/networks/image_transformer.py b/text_recognizer/networks/image_transformer.py
index edebca9..9ed67a4 100644
--- a/text_recognizer/networks/image_transformer.py
+++ b/text_recognizer/networks/image_transformer.py
@@ -13,7 +13,7 @@ import math
from typing import Dict, List, Union, Sequence, Tuple, Type
from einops import rearrange
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, OmegaConf
import torch
from torch import nn
from torch import Tensor
@@ -34,7 +34,7 @@ class ImageTransformer(nn.Module):
self,
input_shape: Sequence[int],
output_shape: Sequence[int],
- encoder: Union[OmegaConf, Dict],
+ encoder: Union[DictConfig, Dict],
mapping: str,
num_decoder_layers: int = 4,
hidden_dim: int = 256,
@@ -101,7 +101,7 @@ class ImageTransformer(nn.Module):
nn.init.normal_(self.feature_map_encoding.bias, -bound, bound)
@staticmethod
- def _configure_encoder(encoder: Union[OmegaConf, NamedTuple]) -> Type[nn.Module]:
+ def _configure_encoder(encoder: Union[DictConfig, Dict]) -> Type[nn.Module]:
encoder = OmegaConf.create(encoder)
network_module = importlib.import_module("text_recognizer.networks")
encoder_class = getattr(network_module, encoder.type)
diff --git a/text_recognizer/networks/residual_network.py b/text_recognizer/networks/residual_network.py
index da7553d..c33f419 100644
--- a/text_recognizer/networks/residual_network.py
+++ b/text_recognizer/networks/residual_network.py
@@ -20,11 +20,7 @@ class Conv2dAuto(nn.Conv2d):
def conv_bn(in_channels: int, out_channels: int, *args, **kwargs) -> nn.Sequential:
"""3x3 convolution with batch norm."""
- conv3x3 = partial(
- Conv2dAuto,
- kernel_size=3,
- bias=False,
- )
+ conv3x3 = partial(Conv2dAuto, kernel_size=3, bias=False,)
return nn.Sequential(
conv3x3(in_channels, out_channels, *args, **kwargs),
nn.BatchNorm2d(out_channels),
diff --git a/text_recognizer/networks/transducer/transducer.py b/text_recognizer/networks/transducer/transducer.py
index b10f93a..d7e3d08 100644
--- a/text_recognizer/networks/transducer/transducer.py
+++ b/text_recognizer/networks/transducer/transducer.py
@@ -392,12 +392,7 @@ def load_transducer_loss(
transitions = gtn.load(str(processed_path / transitions))
preprocessor = Preprocessor(
- data_dir,
- num_features,
- tokens_path,
- lexicon_path,
- use_words,
- prepend_wordsep,
+ data_dir, num_features, tokens_path, lexicon_path, use_words, prepend_wordsep,
)
num_tokens = preprocessor.num_tokens
diff --git a/text_recognizer/networks/vqvae/decoder.py b/text_recognizer/networks/vqvae/decoder.py
index 67ed0d9..8847aba 100644
--- a/text_recognizer/networks/vqvae/decoder.py
+++ b/text_recognizer/networks/vqvae/decoder.py
@@ -44,12 +44,7 @@ class Decoder(nn.Module):
# Configure encoder.
self.decoder = self._build_decoder(
- channels,
- kernel_sizes,
- strides,
- num_residual_layers,
- activation,
- dropout,
+ channels, kernel_sizes, strides, num_residual_layers, activation, dropout,
)
def _build_decompression_block(
@@ -78,9 +73,7 @@ class Decoder(nn.Module):
)
if i < len(self.upsampling):
- modules.append(
- nn.Upsample(size=self.upsampling[i]),
- )
+ modules.append(nn.Upsample(size=self.upsampling[i]),)
if dropout is not None:
modules.append(dropout)
@@ -109,12 +102,7 @@ class Decoder(nn.Module):
) -> nn.Sequential:
self.res_block.append(
- nn.Conv2d(
- self.embedding_dim,
- channels[0],
- kernel_size=1,
- stride=1,
- )
+ nn.Conv2d(self.embedding_dim, channels[0], kernel_size=1, stride=1,)
)
# Bottleneck module.
diff --git a/text_recognizer/networks/vqvae/encoder.py b/text_recognizer/networks/vqvae/encoder.py
index ede5c31..d3adac5 100644
--- a/text_recognizer/networks/vqvae/encoder.py
+++ b/text_recognizer/networks/vqvae/encoder.py
@@ -11,10 +11,7 @@ from text_recognizer.networks.vqvae.vector_quantizer import VectorQuantizer
class _ResidualBlock(nn.Module):
def __init__(
- self,
- in_channels: int,
- out_channels: int,
- dropout: Optional[Type[nn.Module]],
+ self, in_channels: int, out_channels: int, dropout: Optional[Type[nn.Module]],
) -> None:
super().__init__()
self.block = [
@@ -138,12 +135,7 @@ class Encoder(nn.Module):
)
encoder.append(
- nn.Conv2d(
- channels[-1],
- self.embedding_dim,
- kernel_size=1,
- stride=1,
- )
+ nn.Conv2d(channels[-1], self.embedding_dim, kernel_size=1, stride=1,)
)
return nn.Sequential(*encoder)