summaryrefslogtreecommitdiff
path: root/src/text_recognizer/datasets/dataset.py
diff options
context:
space:
mode:
authoraktersnurra <gustaf.rydholm@gmail.com>2021-01-07 20:10:54 +0100
committeraktersnurra <gustaf.rydholm@gmail.com>2021-01-07 20:10:54 +0100
commitff9a21d333f11a42e67c1963ed67de9c0fda87c9 (patch)
treeafee959135416fe92cf6df377e84fb0a9e9714a0 /src/text_recognizer/datasets/dataset.py
parent25b5d6983d51e0e791b96a76beb7e49f392cd9a8 (diff)
Minor updates.
Diffstat (limited to 'src/text_recognizer/datasets/dataset.py')
-rw-r--r--src/text_recognizer/datasets/dataset.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/src/text_recognizer/datasets/dataset.py b/src/text_recognizer/datasets/dataset.py
index 95063bc..e794605 100644
--- a/src/text_recognizer/datasets/dataset.py
+++ b/src/text_recognizer/datasets/dataset.py
@@ -22,6 +22,7 @@ class Dataset(data.Dataset):
init_token: Optional[str] = None,
pad_token: Optional[str] = None,
eos_token: Optional[str] = None,
+ lower: bool = False,
) -> None:
"""Initialization of Dataset class.
@@ -33,6 +34,7 @@ class Dataset(data.Dataset):
init_token (Optional[str]): String representing the start of sequence token. Defaults to None.
pad_token (Optional[str]): String representing the pad token. Defaults to None.
eos_token (Optional[str]): String representing the end of sequence token. Defaults to None.
+ lower (bool): Only use lower case letters. Defaults to False.
Raises:
ValueError: If subsample_fraction is not None and outside the range (0, 1).
@@ -47,7 +49,7 @@ class Dataset(data.Dataset):
self.subsample_fraction = subsample_fraction
self._mapper = EmnistMapper(
- init_token=init_token, eos_token=eos_token, pad_token=pad_token
+ init_token=init_token, eos_token=eos_token, pad_token=pad_token, lower=lower
)
self._input_shape = self._mapper.input_shape
self._output_shape = self._mapper._num_classes