diff options
author | aktersnurra <gustaf.rydholm@gmail.com> | 2020-08-11 23:08:56 +0200 |
---|---|---|
committer | aktersnurra <gustaf.rydholm@gmail.com> | 2020-08-11 23:08:56 +0200 |
commit | 95cbdf5bc1cc9639febda23c28d8f464c998b214 (patch) | |
tree | 435faa5645bab4c05b7824f33d8e94a0bc421b66 /src/text_recognizer/networks | |
parent | 53677be4ec14854ea4881b0d78730e0414c8dedd (diff) |
Working one the cnn lstm ctc model.
Diffstat (limited to 'src/text_recognizer/networks')
-rw-r--r-- | src/text_recognizer/networks/misc.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/src/text_recognizer/networks/misc.py b/src/text_recognizer/networks/misc.py index 9440f9d..2fbab8f 100644 --- a/src/text_recognizer/networks/misc.py +++ b/src/text_recognizer/networks/misc.py @@ -21,8 +21,9 @@ def sliding_window( """ unfold = Unfold(kernel_size=patch_size, stride=stride) - patches = unfold(images) + # Preform the slidning window, unsqueeze as the channel dimesion is lost. + patches = unfold(images).unsqueeze(1) patches = rearrange( - patches, "b (h w) c -> b c h w", h=patch_size[0], w=patch_size[1] + patches, "b c (h w) t -> b t c h w", h=patch_size[0], w=patch_size[1] ) return patches |