From 626cf824526b00b18043315322f86789278939fa Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Tue, 27 Sep 2022 23:16:33 +0200 Subject: Refactor metrics --- text_recognizer/models/metrics/__init__.py | 0 text_recognizer/models/metrics/cer.py | 23 ----------------------- text_recognizer/models/metrics/wer.py | 23 ----------------------- text_recognizer/models/transformer.py | 11 +++++------ 4 files changed, 5 insertions(+), 52 deletions(-) delete mode 100644 text_recognizer/models/metrics/__init__.py delete mode 100644 text_recognizer/models/metrics/cer.py delete mode 100644 text_recognizer/models/metrics/wer.py diff --git a/text_recognizer/models/metrics/__init__.py b/text_recognizer/models/metrics/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/text_recognizer/models/metrics/cer.py b/text_recognizer/models/metrics/cer.py deleted file mode 100644 index 238ecc3..0000000 --- a/text_recognizer/models/metrics/cer.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Character Error Rate (CER).""" -from typing import Sequence - -import torch -import torchmetrics - - -class CharacterErrorRate(torchmetrics.CharErrorRate): - """Character error rate metric, allowing for tokens to be ignored.""" - - def __init__(self, ignore_tokens: Sequence[int], *args): - super().__init__(*args) - self.ignore_tokens = set(ignore_tokens) - - def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: - preds_l = [ - [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist() - ] - targets_l = [ - [t for t in target if t not in self.ignore_tokens] - for target in targets.tolist() - ] - super().update(preds_l, targets_l) diff --git a/text_recognizer/models/metrics/wer.py b/text_recognizer/models/metrics/wer.py deleted file mode 100644 index 78f5854..0000000 --- a/text_recognizer/models/metrics/wer.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Character Error Rate (CER).""" -from typing import Sequence - -import torch -import torchmetrics - - -class WordErrorRate(torchmetrics.WordErrorRate): - """Character error rate metric, allowing for tokens to be ignored.""" - - def __init__(self, ignore_tokens: Sequence[int], *args): - super().__init__(*args) - self.ignore_tokens = set(ignore_tokens) - - def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: - preds_l = [ - [t for t in pred if t not in self.ignore_tokens] for pred in preds.tolist() - ] - targets_l = [ - [t for t in target if t not in self.ignore_tokens] - for target in targets.tolist() - ] - super().update(preds_l, targets_l) diff --git a/text_recognizer/models/transformer.py b/text_recognizer/models/transformer.py index 752f3eb..9500062 100644 --- a/text_recognizer/models/transformer.py +++ b/text_recognizer/models/transformer.py @@ -5,11 +5,10 @@ from typing import Optional, Tuple, Type import torch from omegaconf import DictConfig from torch import nn, Tensor +from torchmetrics import CharErrorRate, WordErrorRate from text_recognizer.data.tokenizer import Tokenizer from text_recognizer.models.base import LitBase -from text_recognizer.models.metrics.cer import CharacterErrorRate -from text_recognizer.models.metrics.wer import WordErrorRate class LitTransformer(LitBase): @@ -33,10 +32,10 @@ class LitTransformer(LitBase): ) self.max_output_len = max_output_len self.ignore_indices = set([self.start_index, self.end_index, self.pad_index]) - self.val_cer = CharacterErrorRate(self.ignore_indices) - self.test_cer = CharacterErrorRate(self.ignore_indices) - self.val_wer = WordErrorRate(self.ignore_indices) - self.test_wer = WordErrorRate(self.ignore_indices) + self.val_cer = CharErrorRate() + self.test_cer = CharErrorRate() + self.val_wer = WordErrorRate() + self.test_wer = WordErrorRate() def forward(self, data: Tensor) -> Tensor: """Forward pass with the transformer network.""" -- cgit v1.2.3-70-g09d2