diff options
author | aktersnurra <gustaf.rydholm@gmail.com> | 2020-11-12 23:42:03 +0100 |
---|---|---|
committer | aktersnurra <gustaf.rydholm@gmail.com> | 2020-11-12 23:42:03 +0100 |
commit | 8fdb6435e15703fa5b76df19728d905650ee1aef (patch) | |
tree | be3bec9e5cab4ef7f9d94528d102e57ce9b16c3f /src/text_recognizer/datasets | |
parent | dc28cbe2b4ed77be92ee8b2b69a20689c3bf02a4 (diff) | |
parent | 6cb08a110620ee09fe9d8a5d008197a801d025df (diff) |
Working cnn transformer.
Diffstat (limited to 'src/text_recognizer/datasets')
-rw-r--r-- | src/text_recognizer/datasets/transforms.py | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/src/text_recognizer/datasets/transforms.py b/src/text_recognizer/datasets/transforms.py index 8deac7f..1105f23 100644 --- a/src/text_recognizer/datasets/transforms.py +++ b/src/text_recognizer/datasets/transforms.py @@ -3,7 +3,8 @@ import numpy as np from PIL import Image import torch from torch import Tensor -from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor +import torch.nn.functional as F +from torchvision.transforms import Compose, ToPILImage, ToTensor from text_recognizer.datasets.util import EmnistMapper @@ -16,6 +17,18 @@ class Transpose: return np.array(image).swapaxes(0, 1) +class Resize: + """Resizes a tensor to a specified width.""" + + def __init__(self, width: int = 952) -> None: + # The default is 952 because of the IAM dataset. + self.width = width + + def __call__(self, image: Tensor) -> Tensor: + """Resize tensor in the last dimension.""" + return F.interpolate(image, size=self.width, mode="nearest") + + class AddTokens: """Adds start of sequence and end of sequence tokens to target tensor.""" |