diff options
author | aktersnurra <gustaf.rydholm@gmail.com> | 2020-10-22 22:45:58 +0200 |
---|---|---|
committer | aktersnurra <gustaf.rydholm@gmail.com> | 2020-10-22 22:45:58 +0200 |
commit | 4d7713746eb936832e84852e90292936b933e87d (patch) | |
tree | 2b2519d1d2ce53d4e1390590f52018d55dadbc7c /src/text_recognizer/datasets/emnist_lines_dataset.py | |
parent | 1b3b8073a19f939d18a0bb85247eb0d99284f7cc (diff) |
Transfomer added, many other changes.
Diffstat (limited to 'src/text_recognizer/datasets/emnist_lines_dataset.py')
-rw-r--r-- | src/text_recognizer/datasets/emnist_lines_dataset.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/src/text_recognizer/datasets/emnist_lines_dataset.py b/src/text_recognizer/datasets/emnist_lines_dataset.py index beb5343..6091da8 100644 --- a/src/text_recognizer/datasets/emnist_lines_dataset.py +++ b/src/text_recognizer/datasets/emnist_lines_dataset.py @@ -37,6 +37,9 @@ class EmnistLinesDataset(Dataset): max_overlap: float = 0.33, num_samples: int = 10000, seed: int = 4711, + init_token: Optional[str] = None, + pad_token: Optional[str] = None, + eos_token: Optional[str] = None, ) -> None: """Set attributes and loads the dataset. @@ -50,6 +53,9 @@ class EmnistLinesDataset(Dataset): max_overlap (float): The maximum overlap between concatenated images. Defaults to 0.33. num_samples (int): Number of samples to generate. Defaults to 10000. seed (int): Seed number. Defaults to 4711. + init_token (Optional[str]): String representing the start of sequence token. Defaults to None. + pad_token (Optional[str]): String representing the pad token. Defaults to None. + eos_token (Optional[str]): String representing the end of sequence token. Defaults to None. """ super().__init__( @@ -57,6 +63,9 @@ class EmnistLinesDataset(Dataset): transform=transform, target_transform=target_transform, subsample_fraction=subsample_fraction, + init_token=init_token, + pad_token=pad_token, + eos_token=eos_token, ) # Extract dataset information. |