summaryrefslogtreecommitdiff
path: root/src/text_recognizer/networks/misc.py
diff options
context:
space:
mode:
authoraktersnurra <gustaf.rydholm@gmail.com>2020-08-20 22:18:35 +0200
committeraktersnurra <gustaf.rydholm@gmail.com>2020-08-20 22:18:35 +0200
commit1f459ba19422593de325983040e176f97cf4ffc0 (patch)
tree89fef442d5dbe0c83253e9566d1762f0704f64e2 /src/text_recognizer/networks/misc.py
parent95cbdf5bc1cc9639febda23c28d8f464c998b214 (diff)
A lot of stuff working :D. ResNet implemented!
Diffstat (limited to 'src/text_recognizer/networks/misc.py')
-rw-r--r--src/text_recognizer/networks/misc.py20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/text_recognizer/networks/misc.py b/src/text_recognizer/networks/misc.py
index 2fbab8f..6f61b5d 100644
--- a/src/text_recognizer/networks/misc.py
+++ b/src/text_recognizer/networks/misc.py
@@ -1,9 +1,9 @@
"""Miscellaneous neural network functionality."""
-from typing import Tuple
+from typing import Tuple, Type
from einops import rearrange
import torch
-from torch.nn import Unfold
+from torch import nn
def sliding_window(
@@ -20,10 +20,24 @@ def sliding_window(
torch.Tensor: A tensor with the shape (batch, patches, height, width).
"""
- unfold = Unfold(kernel_size=patch_size, stride=stride)
+ unfold = nn.Unfold(kernel_size=patch_size, stride=stride)
# Preform the slidning window, unsqueeze as the channel dimesion is lost.
patches = unfold(images).unsqueeze(1)
patches = rearrange(
patches, "b c (h w) t -> b t c h w", h=patch_size[0], w=patch_size[1]
)
return patches
+
+
+def activation_function(activation: str) -> Type[nn.Module]:
+ """Returns the callable activation function."""
+ activation_fns = nn.ModuleDict(
+ [
+ ["gelu", nn.GELU()],
+ ["leaky_relu", nn.LeakyReLU(negative_slope=1.0e-2, inplace=True)],
+ ["none", nn.Identity()],
+ ["relu", nn.ReLU(inplace=True)],
+ ["selu", nn.SELU(inplace=True)],
+ ]
+ )
+ return activation_fns[activation.lower()]