summaryrefslogtreecommitdiff
path: root/src/text_recognizer/datasets
diff options
context:
space:
mode:
authoraktersnurra <gustaf.rydholm@gmail.com>2020-09-14 22:15:47 +0200
committeraktersnurra <gustaf.rydholm@gmail.com>2020-09-14 22:15:47 +0200
commit3b06ef615a8db67a03927576e0c12fbfb2501f5f (patch)
treee1c2b1289971c8480327408de46152481e99b539 /src/text_recognizer/datasets
parent2b63fd952bdc9c7c72edd501cbcdbf3231e98f00 (diff)
Fixed CTC loss.
Diffstat (limited to 'src/text_recognizer/datasets')
-rw-r--r--src/text_recognizer/datasets/dataset.py7
-rw-r--r--src/text_recognizer/datasets/emnist_dataset.py9
-rw-r--r--src/text_recognizer/datasets/emnist_lines_dataset.py9
-rw-r--r--src/text_recognizer/datasets/transforms.py13
-rw-r--r--src/text_recognizer/datasets/util.py2
5 files changed, 26 insertions, 14 deletions
diff --git a/src/text_recognizer/datasets/dataset.py b/src/text_recognizer/datasets/dataset.py
index f328a0f..05520e5 100644
--- a/src/text_recognizer/datasets/dataset.py
+++ b/src/text_recognizer/datasets/dataset.py
@@ -23,7 +23,7 @@ class Dataset(data.Dataset):
Args:
train (bool): If True, loads the training set, otherwise the validation set is loaded. Defaults to False.
- subsample_fraction (float): Description of parameter `subsample_fraction`. Defaults to None.
+ subsample_fraction (float): The fraction of the dataset to use for training. Defaults to None.
transform (Optional[Callable]): Transform(s) for input data. Defaults to None.
target_transform (Optional[Callable]): Transform(s) for output data. Defaults to None.
@@ -31,6 +31,7 @@ class Dataset(data.Dataset):
ValueError: If subsample_fraction is not None and outside the range (0, 1).
"""
+
self.train = train
self.split = "train" if self.train else "test"
@@ -96,8 +97,8 @@ class Dataset(data.Dataset):
if self.subsample_fraction is None:
return
num_subsample = int(self.data.shape[0] * self.subsample_fraction)
- self.data = self.data[:num_subsample]
- self.targets = self.targets[:num_subsample]
+ self._data = self.data[:num_subsample]
+ self._targets = self.targets[:num_subsample]
def __len__(self) -> int:
"""Returns the length of the dataset."""
diff --git a/src/text_recognizer/datasets/emnist_dataset.py b/src/text_recognizer/datasets/emnist_dataset.py
index 81268fb..d01dcee 100644
--- a/src/text_recognizer/datasets/emnist_dataset.py
+++ b/src/text_recognizer/datasets/emnist_dataset.py
@@ -13,17 +13,10 @@ from torchvision.datasets import EMNIST
from torchvision.transforms import Compose, ToTensor
from text_recognizer.datasets.dataset import Dataset
+from text_recognizer.datasets.transforms import Transpose
from text_recognizer.datasets.util import DATA_DIRNAME
-class Transpose:
- """Transposes the EMNIST image to the correct orientation."""
-
- def __call__(self, image: Image) -> np.ndarray:
- """Swaps axis."""
- return np.array(image).swapaxes(0, 1)
-
-
class EmnistDataset(Dataset):
"""This is a class for resampling and subsampling the PyTorch EMNIST dataset."""
diff --git a/src/text_recognizer/datasets/emnist_lines_dataset.py b/src/text_recognizer/datasets/emnist_lines_dataset.py
index 8fa77cd..6268a01 100644
--- a/src/text_recognizer/datasets/emnist_lines_dataset.py
+++ b/src/text_recognizer/datasets/emnist_lines_dataset.py
@@ -19,7 +19,6 @@ from text_recognizer.datasets.util import (
EmnistMapper,
ESSENTIALS_FILENAME,
)
-from text_recognizer.networks import sliding_window
DATA_DIRNAME = DATA_DIRNAME / "processed" / "emnist_lines"
@@ -32,6 +31,7 @@ class EmnistLinesDataset(Dataset):
train: bool = False,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
+ subsample_fraction: float = None,
max_length: int = 34,
min_overlap: float = 0,
max_overlap: float = 0.33,
@@ -44,6 +44,7 @@ class EmnistLinesDataset(Dataset):
train (bool): Flag for the filename. Defaults to False. Defaults to None.
transform (Optional[Callable]): The transform of the data. Defaults to None.
target_transform (Optional[Callable]): The transform of the target. Defaults to None.
+ subsample_fraction (float): The fraction of the dataset to use for training. Defaults to None.
max_length (int): The maximum number of characters. Defaults to 34.
min_overlap (float): The minimum overlap between concatenated images. Defaults to 0.
max_overlap (float): The maximum overlap between concatenated images. Defaults to 0.33.
@@ -52,7 +53,10 @@ class EmnistLinesDataset(Dataset):
"""
super().__init__(
- train=train, transform=transform, target_transform=target_transform,
+ train=train,
+ transform=transform,
+ target_transform=target_transform,
+ subsample_fraction=subsample_fraction,
)
# Extract dataset information.
@@ -128,6 +132,7 @@ class EmnistLinesDataset(Dataset):
if not self.data_filename.exists():
self._generate_data()
self._load_data()
+ self._subsample()
def _load_data(self) -> None:
"""Loads the dataset from the h5 file."""
diff --git a/src/text_recognizer/datasets/transforms.py b/src/text_recognizer/datasets/transforms.py
new file mode 100644
index 0000000..17231a8
--- /dev/null
+++ b/src/text_recognizer/datasets/transforms.py
@@ -0,0 +1,13 @@
+"""Transforms for PyTorch datasets."""
+import numpy as np
+from PIL import Image
+import torch
+from torch import Tensor
+
+
+class Transpose:
+ """Transposes the EMNIST image to the correct orientation."""
+
+ def __call__(self, image: Image) -> np.ndarray:
+ """Swaps axis."""
+ return np.array(image).swapaxes(0, 1)
diff --git a/src/text_recognizer/datasets/util.py b/src/text_recognizer/datasets/util.py
index 3acf5db..73968a1 100644
--- a/src/text_recognizer/datasets/util.py
+++ b/src/text_recognizer/datasets/util.py
@@ -126,7 +126,7 @@ class EmnistMapper:
"?",
]
- # padding symbol
+ # padding symbol, and acts as blank symbol as well.
extra_symbols.append("_")
max_key = max(mapping.keys())