diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-08-06 02:42:45 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2021-08-06 02:42:45 +0200 |
commit | 3ab82ad36bce6fa698a13a029a0694b75a5947b7 (patch) | |
tree | 136f71a62d60e3ccf01e1f95d64bb4d9f9c9befe /text_recognizer/data/emnist_mapping.py | |
parent | 1bccf71cf4eec335001b50a8fbc0c991d0e6d13a (diff) |
Fix VQVAE into en/decoder, bug in wandb artifact code uploading
Diffstat (limited to 'text_recognizer/data/emnist_mapping.py')
-rw-r--r-- | text_recognizer/data/emnist_mapping.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/text_recognizer/data/emnist_mapping.py b/text_recognizer/data/emnist_mapping.py index 925d214..3e91594 100644 --- a/text_recognizer/data/emnist_mapping.py +++ b/text_recognizer/data/emnist_mapping.py @@ -9,15 +9,23 @@ from text_recognizer.data.emnist import emnist_mapping class EmnistMapping(AbstractMapping): - def __init__(self, extra_symbols: Optional[Set[str]] = None) -> None: + def __init__(self, extra_symbols: Optional[Set[str]] = None, lower: bool = True) -> None: self.extra_symbols = set(extra_symbols) if extra_symbols is not None else None self.mapping, self.inverse_mapping, self.input_size = emnist_mapping( self.extra_symbols ) + if lower: + self._to_lower() super().__init__(self.input_size, self.mapping, self.inverse_mapping) - def __attrs_post_init__(self) -> None: - """Post init configuration.""" + def _to_lower(self) -> None: + """Converts mapping to lowercase letters only.""" + def _filter(x: int) -> int: + if 40 <= x: + return x - 26 + return x + self.inverse_mapping = {v: _filter(k) for k, v in enumerate(self.mapping)} + self.mapping = [c for c in self.mapping if not c.isupper()] def get_token(self, index: Union[int, Tensor]) -> str: if (index := int(index)) <= len(self.mapping): |