summaryrefslogtreecommitdiff
path: root/src/text_recognizer/datasets/emnist_lines_dataset.py
diff options
context:
space:
mode:
authoraktersnurra <gustaf.rydholm@gmail.com>2021-01-07 20:10:54 +0100
committeraktersnurra <gustaf.rydholm@gmail.com>2021-01-07 20:10:54 +0100
commitff9a21d333f11a42e67c1963ed67de9c0fda87c9 (patch)
treeafee959135416fe92cf6df377e84fb0a9e9714a0 /src/text_recognizer/datasets/emnist_lines_dataset.py
parent25b5d6983d51e0e791b96a76beb7e49f392cd9a8 (diff)
Minor updates.
Diffstat (limited to 'src/text_recognizer/datasets/emnist_lines_dataset.py')
-rw-r--r--src/text_recognizer/datasets/emnist_lines_dataset.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/text_recognizer/datasets/emnist_lines_dataset.py b/src/text_recognizer/datasets/emnist_lines_dataset.py
index eddf341..1992446 100644
--- a/src/text_recognizer/datasets/emnist_lines_dataset.py
+++ b/src/text_recognizer/datasets/emnist_lines_dataset.py
@@ -44,6 +44,7 @@ class EmnistLinesDataset(Dataset):
init_token: Optional[str] = None,
pad_token: Optional[str] = None,
eos_token: Optional[str] = None,
+ lower: bool = False,
) -> None:
"""Set attributes and loads the dataset.
@@ -60,6 +61,7 @@ class EmnistLinesDataset(Dataset):
init_token (Optional[str]): String representing the start of sequence token. Defaults to None.
pad_token (Optional[str]): String representing the pad token. Defaults to None.
eos_token (Optional[str]): String representing the end of sequence token. Defaults to None.
+ lower (bool): If True, convert uppercase letters to lowercase. Otherwise, use both upper and lowercase.
"""
self.pad_token = "_" if pad_token is None else pad_token
@@ -72,6 +74,7 @@ class EmnistLinesDataset(Dataset):
init_token=init_token,
pad_token=self.pad_token,
eos_token=eos_token,
+ lower=lower,
)
# Extract dataset information.