From 0540237d794ab2071764dc74e4d3bb52f5bf44be Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 27 Sep 2022 00:10:26 +0200 Subject: Update metrics --- text_recognizer/models/metrics/__init__.py | 0 text_recognizer/models/metrics/cer.py | 23 +++++++++++++++++++++++ text_recognizer/models/metrics/wer.py | 23 +++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 text_recognizer/models/metrics/__init__.py create mode 100644 text_recognizer/models/metrics/cer.py create mode 100644 text_recognizer/models/metrics/wer.py (limited to 'text_recognizer/models/metrics') diff --git a/text_recognizer/models/metrics/__init__.py b/text_recognizer/models/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/text_recognizer/models/metrics/cer.py b/text_recognizer/models/metrics/cer.py new file mode 100644 index 0000000..238ecc3 --- /dev/null +++ b/text_recognizer/models/metrics/cer.py @@ -0,0 +1,23 @@ +"""Character Error Rate (CER).""" +from typing import Sequence + +import torch +import torchmetrics + + +class CharacterErrorRate(torchmetrics.CharErrorRate): + """Character error rate metric, allowing for tokens to be ignored.""" + + def __init__(self, ignore_tokens: Sequence[int], *args): + super().__init__(*args) + self.ignore_tokens = set(ignore_tokens) + + def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: + preds_l = [ + [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist() + ] + targets_l = [ + [t for t in target if t not in self.ignore_tokens] + for target in targets.tolist() + ] + super().update(preds_l, targets_l) diff --git a/text_recognizer/models/metrics/wer.py b/text_recognizer/models/metrics/wer.py new file mode 100644 index 0000000..78f5854 --- /dev/null +++ b/text_recognizer/models/metrics/wer.py @@ -0,0 +1,23 @@ +"""Character Error Rate (CER).""" +from typing import Sequence + +import torch +import torchmetrics + + +class WordErrorRate(torchmetrics.WordErrorRate): + """Character error rate metric, allowing for tokens to be ignored.""" + + def __init__(self, ignore_tokens: Sequence[int], *args): + super().__init__(*args) + self.ignore_tokens = set(ignore_tokens) + + def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: + preds_l = [ + [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist() + ] + targets_l = [ + [t for t in target if t not in self.ignore_tokens] + for target in targets.tolist() + ] + super().update(preds_l, targets_l) -- cgit v1.2.3-70-g09d2