diff options
Diffstat (limited to 'text_recognizer/data/emnist_mapping.py')
-rw-r--r-- | text_recognizer/data/emnist_mapping.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/text_recognizer/data/emnist_mapping.py b/text_recognizer/data/emnist_mapping.py index 925d214..3e91594 100644 --- a/text_recognizer/data/emnist_mapping.py +++ b/text_recognizer/data/emnist_mapping.py @@ -9,15 +9,23 @@ from text_recognizer.data.emnist import emnist_mapping class EmnistMapping(AbstractMapping): - def __init__(self, extra_symbols: Optional[Set[str]] = None) -> None: + def __init__(self, extra_symbols: Optional[Set[str]] = None, lower: bool = True) -> None: self.extra_symbols = set(extra_symbols) if extra_symbols is not None else None self.mapping, self.inverse_mapping, self.input_size = emnist_mapping( self.extra_symbols ) + if lower: + self._to_lower() super().__init__(self.input_size, self.mapping, self.inverse_mapping) - def __attrs_post_init__(self) -> None: - """Post init configuration.""" + def _to_lower(self) -> None: + """Converts mapping to lowercase letters only.""" + def _filter(x: int) -> int: + if 40 <= x: + return x - 26 + return x + self.inverse_mapping = {v: _filter(k) for k, v in enumerate(self.mapping)} + self.mapping = [c for c in self.mapping if not c.isupper()] def get_token(self, index: Union[int, Tensor]) -> str: if (index := int(index)) <= len(self.mapping): |