diff options
author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-27 23:16:33 +0200 |
---|---|---|
committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2022-09-27 23:16:33 +0200 |
commit | 626cf824526b00b18043315322f86789278939fa (patch) | |
tree | fffd36187cdc056011a2db2cb2b3016a4f720886 | |
parent | 9c7dbb9ca70858b870f74ecf595d3169f0cbc711 (diff) |
Refactor metrics
-rw-r--r-- | text_recognizer/models/metrics/__init__.py | 0 | ||||
-rw-r--r-- | text_recognizer/models/metrics/cer.py | 23 | ||||
-rw-r--r-- | text_recognizer/models/metrics/wer.py | 23 | ||||
-rw-r--r-- | text_recognizer/models/transformer.py | 11 |
4 files changed, 5 insertions, 52 deletions
diff --git a/text_recognizer/models/metrics/__init__.py b/text_recognizer/models/metrics/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/text_recognizer/models/metrics/__init__.py +++ /dev/null diff --git a/text_recognizer/models/metrics/cer.py b/text_recognizer/models/metrics/cer.py deleted file mode 100644 index 238ecc3..0000000 --- a/text_recognizer/models/metrics/cer.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Character Error Rate (CER).""" -from typing import Sequence - -import torch -import torchmetrics - - -class CharacterErrorRate(torchmetrics.CharErrorRate): - """Character error rate metric, allowing for tokens to be ignored.""" - - def __init__(self, ignore_tokens: Sequence[int], *args): - super().__init__(*args) - self.ignore_tokens = set(ignore_tokens) - - def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: - preds_l = [ - [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist() - ] - targets_l = [ - [t for t in target if t not in self.ignore_tokens] - for target in targets.tolist() - ] - super().update(preds_l, targets_l) diff --git a/text_recognizer/models/metrics/wer.py b/text_recognizer/models/metrics/wer.py deleted file mode 100644 index 78f5854..0000000 --- a/text_recognizer/models/metrics/wer.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Character Error Rate (CER).""" -from typing import Sequence - -import torch -import torchmetrics - - -class WordErrorRate(torchmetrics.WordErrorRate): - """Character error rate metric, allowing for tokens to be ignored.""" - - def __init__(self, ignore_tokens: Sequence[int], *args): - super().__init__(*args) - self.ignore_tokens = set(ignore_tokens) - - def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: - preds_l = [ - [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist() - ] - targets_l = [ - [t for t in target if t not in self.ignore_tokens] - for target in targets.tolist() - ] - super().update(preds_l, targets_l) diff --git a/text_recognizer/models/transformer.py b/text_recognizer/models/transformer.py index 752f3eb..9500062 100644 --- a/text_recognizer/models/transformer.py +++ b/text_recognizer/models/transformer.py @@ -5,11 +5,10 @@ from typing import Optional, Tuple, Type import torch from omegaconf import DictConfig from torch import nn, Tensor +from torchmetrics import CharErrorRate, WordErrorRate from text_recognizer.data.tokenizer import Tokenizer from text_recognizer.models.base import LitBase -from text_recognizer.models.metrics.cer import CharacterErrorRate -from text_recognizer.models.metrics.wer import WordErrorRate class LitTransformer(LitBase): @@ -33,10 +32,10 @@ class LitTransformer(LitBase): ) self.max_output_len = max_output_len self.ignore_indices = set([self.start_index, self.end_index, self.pad_index]) - self.val_cer = CharacterErrorRate(self.ignore_indices) - self.test_cer = CharacterErrorRate(self.ignore_indices) - self.val_wer = WordErrorRate(self.ignore_indices) - self.test_wer = WordErrorRate(self.ignore_indices) + self.val_cer = CharErrorRate() + self.test_cer = CharErrorRate() + self.val_wer = WordErrorRate() + self.test_wer = WordErrorRate() def forward(self, data: Tensor) -> Tensor: """Forward pass with the transformer network.""" |