summaryrefslogtreecommitdiff
path: root/src/text_recognizer/datasets/emnist_lines_dataset.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/text_recognizer/datasets/emnist_lines_dataset.py')
-rw-r--r--src/text_recognizer/datasets/emnist_lines_dataset.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/src/text_recognizer/datasets/emnist_lines_dataset.py b/src/text_recognizer/datasets/emnist_lines_dataset.py
index beb5343..6091da8 100644
--- a/src/text_recognizer/datasets/emnist_lines_dataset.py
+++ b/src/text_recognizer/datasets/emnist_lines_dataset.py
@@ -37,6 +37,9 @@ class EmnistLinesDataset(Dataset):
max_overlap: float = 0.33,
num_samples: int = 10000,
seed: int = 4711,
+ init_token: Optional[str] = None,
+ pad_token: Optional[str] = None,
+ eos_token: Optional[str] = None,
) -> None:
"""Set attributes and loads the dataset.
@@ -50,6 +53,9 @@ class EmnistLinesDataset(Dataset):
max_overlap (float): The maximum overlap between concatenated images. Defaults to 0.33.
num_samples (int): Number of samples to generate. Defaults to 10000.
seed (int): Seed number. Defaults to 4711.
+ init_token (Optional[str]): String representing the start of sequence token. Defaults to None.
+ pad_token (Optional[str]): String representing the pad token. Defaults to None.
+ eos_token (Optional[str]): String representing the end of sequence token. Defaults to None.
"""
super().__init__(
@@ -57,6 +63,9 @@ class EmnistLinesDataset(Dataset):
transform=transform,
target_transform=target_transform,
subsample_fraction=subsample_fraction,
+ init_token=init_token,
+ pad_token=pad_token,
+ eos_token=eos_token,
)
# Extract dataset information.