In [1]:

import bitstring
import numpy as np
from PIL import Image, ImageEnhance
from PIL import ImageOps, ImageMath
from matplotlib import pyplot as plt
import cv2
%pylab inline

t56s = '0123456789[#@:>? ABCDEFGHI&.](<  JKLMNOPQR-$*);\'|/STUVWXYZ ,%="!'

def read_record_ETL4(f, pos=6112):
   f = bitstring.ConstBitStream(filename=f)
   f.bytepos = pos * 2952
   r = f.readlist('2*uint:36,uint:8,pad:28,uint:8,pad:28,4*uint:6,pad:12,15*uint:36,pad:1008,bytes:21888')
   return r
filename = 'ETL4/ETL4/ETL4C' # specify the ETL4 filename here
r = read_record_ETL4(filename)
iF = Image.frombytes('F', (r[18], r[19]), r[-1], 'bit', 4)
iP = iF.convert('L')
enhancer = ImageEnhance.Brightness(iP)
iE = enhancer.enhance(r[20])
plt.imshow(iE)

Populating the interactive namespace from numpy and matplotlib

Out[1]:

<matplotlib.image.AxesImage at 0x82a0400>

In [2]:

import sys

In [3]:

def create_data():
    data =  np.zeros((6113,76,72))
    for i in range(6113):
        r = read_record_ETL4(filename,pos=i)
        iF = Image.frombytes('F', (r[18], r[19]), r[-1], 'bit', 4)
        iP = iF.convert('L')
        enhancer = ImageEnhance.Brightness(iP)
        iE = enhancer.enhance(r[20])
        temp=np.array(iE)
        data[i,:,:] = temp
    return data    
    
        

In [4]:

data = create_data()

In [5]:

plt.imshow(data[301,:,:], cmap='gray')

Out[5]:

<matplotlib.image.AxesImage at 0x852e748>

In [6]:

np.unique(data[543,:,:])

Out[6]:

array([  48.,   64.,   80.,   96.,  112.,  128.,  144.])

Ok, we got our data set composed of 6112 letters of 76x72 in grey scale. Now as we saw our dataset is not particulary clean. Let's set a function that cleans our dataset.
I implemented a simple gaussian blur and then thresholding (otsu's histogram method) and a "TOZERO" binarization in order to keep the stroke pressure greyscale to get a better a results hopefully later on, When we create the Japanese letters

You can try out yourself the smoothing as I left the functions here greyed out:

In [7]:

def preprocessing_data(data1):
    # this function cleans the images and binarize them in order to create a better dataset for our vae
    kernel = np.ones((3,3),np.float32)/9
    crop_template = np.zeros((data.shape[0],data1.shape[2],data1.shape[2])) # cropping template
    for i in range(data1.shape[0]):
#           dst = cv2.medianBlur(data1[i,:,:],3)  
          dst = cv2.GaussianBlur(data1[i,:,:],(3,3),0) # smoothing
          ret,data1[i,:,:] = cv2.threshold(dst,0,255,cv2.THRESH_TOZERO+cv2.THRESH_OTSU) #binarizing
          crop_template[i,:,:] = data1[i,:72,:] # cropping
    return crop_template
data = np.array(data, dtype = np.uint8) # 8 bit unsigned pictures for opencv
data1=data.copy()
data1 = preprocessing_data(data1)

Let's check some of our samples at random

In [23]:

ran =np.random.randint(int(data1.shape[0]), size=(2, 1))
figure()
subplot(1,4,1),imshow(data[int(ran[0]),:,:],cmap = 'gray')
title('original  {}'.format(int(ran[0]))), xticks([]), plt.yticks([])
subplot(1,4,2),imshow(data1[int(ran[0]),:,:],cmap='gray')
title('Sample  {}'.format(int(ran[0]))), xticks([]), plt.yticks([])
clim([0, 45])
subplot(1,4,3),imshow(data[int(ran[1]),:,:],cmap = 'gray')
title('original  {}'.format(int(ran[1]))), xticks([]), plt.yticks([])
subplot(1,4,4),imshow(data1[int(ran[1]),:,:],cmap='gray')
title('Sample  {}'.format(int(ran[1]))), xticks([]), plt.yticks([])
clim([0, 45])

Great, let's move to our model after this long preprocessing phase. Let's create our variational autoencoder for the purpose of training.
I had issues with installing the bitstring package with the tensorflow on the same kernel so I had to seperate the notebooks.

In [20]:

np.save('Japanese.npy', data1) 

You can move to the next notebook located :here