From ae589fb3ffdbf6c4bb1ae35345f7a3665deeebc5 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 23 Mar 2021 21:55:42 +0100 Subject: refactored emnist lines dataset --- text_recognizer/datasets/base_dataset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'text_recognizer/datasets/base_dataset.py') diff --git a/text_recognizer/datasets/base_dataset.py b/text_recognizer/datasets/base_dataset.py index a004b8d..a9e9c24 100644 --- a/text_recognizer/datasets/base_dataset.py +++ b/text_recognizer/datasets/base_dataset.py @@ -61,13 +61,13 @@ def convert_strings_to_labels( strings: Sequence[str], mapping: Dict[str, int], length: int ) -> Tensor: """ - Convert a sequence of N strings to (N, length) ndarray, with each string wrapped with and tokens, - and padded wiht the

token. + Convert a sequence of N strings to (N, length) ndarray, with each string wrapped with and tokens, + and padded wiht the

token. """ - labels = torch.ones((len(strings), length), dtype=torch.long) * mapping["

"] + labels = torch.ones((len(strings), length), dtype=torch.long) * mapping["

"] for i, string in enumerate(strings): tokens = list(string) - tokens = ["", *tokens, ""] + tokens = ["", *tokens, ""] for j, token in enumerate(tokens): labels[i, j] = mapping[token] return labels -- cgit v1.2.3-70-g09d2