summaryrefslogtreecommitdiff
path: root/src/text_recognizer/datasets/transforms.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/text_recognizer/datasets/transforms.py')
-rw-r--r--src/text_recognizer/datasets/transforms.py15
1 files changed, 14 insertions, 1 deletions
diff --git a/src/text_recognizer/datasets/transforms.py b/src/text_recognizer/datasets/transforms.py
index 8deac7f..1105f23 100644
--- a/src/text_recognizer/datasets/transforms.py
+++ b/src/text_recognizer/datasets/transforms.py
@@ -3,7 +3,8 @@ import numpy as np
from PIL import Image
import torch
from torch import Tensor
-from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor
+import torch.nn.functional as F
+from torchvision.transforms import Compose, ToPILImage, ToTensor
from text_recognizer.datasets.util import EmnistMapper
@@ -16,6 +17,18 @@ class Transpose:
return np.array(image).swapaxes(0, 1)
+class Resize:
+ """Resizes a tensor to a specified width."""
+
+ def __init__(self, width: int = 952) -> None:
+ # The default is 952 because of the IAM dataset.
+ self.width = width
+
+ def __call__(self, image: Tensor) -> Tensor:
+ """Resize tensor in the last dimension."""
+ return F.interpolate(image, size=self.width, mode="nearest")
+
+
class AddTokens:
"""Adds start of sequence and end of sequence tokens to target tensor."""