%pylab inline
Populating the interactive namespace from numpy and matplotlib
from importlib import reload
import os, sys, re, glob, time, pickle, IPython, logging
import scipy.ndimage as ndi
from itertools import islice
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchmore import layers, flex
import torchtrainers as tt
from torch.utils.data import DataLoader
from webdataset import WebDataset
from ocrlib import ocrhelpers as helpers
from ocrlib.ocrhelpers import *
from ocrlib import ocrmodels as models
RUN("date"); RUN("hostname"); RUN("whoami"); RUN("nvidia-smi -L")
charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
def encode_str(s):
return [charset.find(c)+1 for c in s]
def decode_str(l):
return "".join([charset[k-1] for k in l])
transforms = [
lambda x: (torch.tensor(x).float()/255.0).unsqueeze(0),
lambda s: torch.tensor(encode_str(s)).long()
]
training = WebDataset("data/words-simple-training.tar", decoder="l8",
extensions="jpg;jpeg;ppm;png txt", transforms=transforms)
testing = WebDataset("data/words-simple-test.tar", decoder="l8",
extensions="jpg;jpeg;ppm;png txt", transforms=transforms)
training_dl = DataLoader(training, batch_size=5, collate_fn=helpers.collate4ocr)
testing_dl = DataLoader(testing, batch_size=20, collate_fn=helpers.collate4ocr)
next(iter(training_dl))[0].size()
date : Fri Oct 4 22:00:51 UTC 2019 hostname : sedna whoami : tmb nvidia-smi -L : GPU 0: GeForce GTX 1080 Ti (UUID: GPU-2d5cf167-db75-89ec-c6f7-5639237768ce)
torch.Size([5, 1, 96, 496])
!awk '/lstm2_ctc/' RS="\n\n" ocrlib/ocrmodels.py
def make_lstm2_ctc(noutput=noutput): model = nn.Sequential( layers.Input("BDHW", range=(0, 1), sizes=[None, 1, None, None]), *combos.conv2d_block(100, 3, mp=2, repeat=2), *combos.conv2d_block(200, 3, mp=2, repeat=2), *combos.conv2d_block(300, 3, mp=2, repeat=2), *combos.conv2d_block(400, 3, repeat=2), flex.Lstm2(400), *project_and_conv1d(800, noutput) ) flex.shape_inference(model, (1, 1, 48, 300)) return model
model = models.make("lstm2_ctc")
model
Sequential( (0): Input(BDHW->BDHW torch.float32 (0, 1) cuda:0 [None, 1, None, None]) (1): Conv2d(1, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (2): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (3): ReLU() (4): Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (5): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (6): ReLU() (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (8): Conv2d(100, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (9): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (10): ReLU() (11): Conv2d(200, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (12): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (13): ReLU() (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (15): Conv2d(200, 300, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (16): BatchNorm2d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (17): ReLU() (18): Conv2d(300, 300, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (19): BatchNorm2d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (20): ReLU() (21): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (22): Conv2d(300, 400, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (23): BatchNorm2d(400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (24): ReLU() (25): Conv2d(400, 400, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (26): BatchNorm2d(400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (27): ReLU() (28): BDHW_LSTM( (hlstm): LSTM(400, 400, bidirectional=True) (vlstm): LSTM(800, 400, bidirectional=True) ) (29): Fun None lambda x: x.max(2)[0] (30): Conv1d(800, 800, kernel_size=(5,), stride=(1,)) (31): BatchNorm1d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (32): ReLU() (33): Conv1d(800, 53, kernel_size=(1,), stride=(1,)) (34): Reorder("BDL", "BDL") )
trainer = helpers.LineTrainer(model)
trainer.train(training_dl, 10, every=15)
figsize(10, 10)
for i, batch in enumerate(islice(training_dl, 0, 10)):
subplot(5, 2, i+1)
result = trainer.predict_batch(batch[0], threshold=0.8)
imshow(batch[0][0,0].detach().numpy())
title(decode_str(result[0]))
/usr/lib/python3/dist-packages/scipy/ndimage/measurements.py:431: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`. safe = ((np.issubdtype(dt, int) and dt.itemsize <= int_size) or
trainer.errors(testing_dl)
(143, 5001)