summaryrefslogtreecommitdiff
path: root/text_recognizer/models/metrics/wer.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-27 00:10:26 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2022-09-27 00:10:26 +0200
commit0540237d794ab2071764dc74e4d3bb52f5bf44be (patch)
treedad3469f843da16716871d0b9805bf0301aa6cfe /text_recognizer/models/metrics/wer.py
parentbf680dce6bc7dcadd20923a193fc9ab8fbd0a0c6 (diff)
Update metrics
Diffstat (limited to 'text_recognizer/models/metrics/wer.py')
-rw-r--r--text_recognizer/models/metrics/wer.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/text_recognizer/models/metrics/wer.py b/text_recognizer/models/metrics/wer.py
new file mode 100644
index 0000000..78f5854
--- /dev/null
+++ b/text_recognizer/models/metrics/wer.py
@@ -0,0 +1,23 @@
+"""Character Error Rate (CER)."""
+from typing import Sequence
+
+import torch
+import torchmetrics
+
+
+class WordErrorRate(torchmetrics.WordErrorRate):
+ """Character error rate metric, allowing for tokens to be ignored."""
+
+ def __init__(self, ignore_tokens: Sequence[int], *args):
+ super().__init__(*args)
+ self.ignore_tokens = set(ignore_tokens)
+
+ def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None:
+ preds_l = [
+ [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist()
+ ]
+ targets_l = [
+ [t for t in target if t not in self.ignore_tokens]
+ for target in targets.tolist()
+ ]
+ super().update(preds_l, targets_l)