import bitstring
import numpy as np
from PIL import Image, ImageEnhance
from PIL import ImageOps, ImageMath
from matplotlib import pyplot as plt
import cv2
%pylab inline
t56s = '0123456789[#@:>? ABCDEFGHI&.](< JKLMNOPQR-$*);\'|/STUVWXYZ ,%="!'
def read_record_ETL4(f, pos=6112):
f = bitstring.ConstBitStream(filename=f)
f.bytepos = pos * 2952
r = f.readlist('2*uint:36,uint:8,pad:28,uint:8,pad:28,4*uint:6,pad:12,15*uint:36,pad:1008,bytes:21888')
return r
filename = 'ETL4/ETL4/ETL4C' # specify the ETL4 filename here
r = read_record_ETL4(filename)
iF = Image.frombytes('F', (r[18], r[19]), r[-1], 'bit', 4)
iP = iF.convert('L')
enhancer = ImageEnhance.Brightness(iP)
iE = enhancer.enhance(r[20])
plt.imshow(iE)
Populating the interactive namespace from numpy and matplotlib
<matplotlib.image.AxesImage at 0x82a0400>
import sys
def create_data():
data = np.zeros((6113,76,72))
for i in range(6113):
r = read_record_ETL4(filename,pos=i)
iF = Image.frombytes('F', (r[18], r[19]), r[-1], 'bit', 4)
iP = iF.convert('L')
enhancer = ImageEnhance.Brightness(iP)
iE = enhancer.enhance(r[20])
temp=np.array(iE)
data[i,:,:] = temp
return data
data = create_data()
plt.imshow(data[301,:,:], cmap='gray')
<matplotlib.image.AxesImage at 0x852e748>
np.unique(data[543,:,:])
array([ 48., 64., 80., 96., 112., 128., 144.])
Ok, we got our data set composed of 6112 letters of 76x72 in grey scale.
Now as we saw our dataset is not particulary clean. Let's set a function that cleans our dataset.
I implemented a simple gaussian blur and then thresholding (otsu's histogram method) and a "TOZERO" binarization in order to keep the stroke pressure greyscale to get a better a results hopefully later on, When we create the Japanese letters
You can try out yourself the smoothing as I left the functions here greyed out:
def preprocessing_data(data1):
# this function cleans the images and binarize them in order to create a better dataset for our vae
kernel = np.ones((3,3),np.float32)/9
crop_template = np.zeros((data.shape[0],data1.shape[2],data1.shape[2])) # cropping template
for i in range(data1.shape[0]):
# dst = cv2.medianBlur(data1[i,:,:],3)
dst = cv2.GaussianBlur(data1[i,:,:],(3,3),0) # smoothing
ret,data1[i,:,:] = cv2.threshold(dst,0,255,cv2.THRESH_TOZERO+cv2.THRESH_OTSU) #binarizing
crop_template[i,:,:] = data1[i,:72,:] # cropping
return crop_template
data = np.array(data, dtype = np.uint8) # 8 bit unsigned pictures for opencv
data1=data.copy()
data1 = preprocessing_data(data1)
Let's check some of our samples at random
ran =np.random.randint(int(data1.shape[0]), size=(2, 1))
figure()
subplot(1,4,1),imshow(data[int(ran[0]),:,:],cmap = 'gray')
title('original {}'.format(int(ran[0]))), xticks([]), plt.yticks([])
subplot(1,4,2),imshow(data1[int(ran[0]),:,:],cmap='gray')
title('Sample {}'.format(int(ran[0]))), xticks([]), plt.yticks([])
clim([0, 45])
subplot(1,4,3),imshow(data[int(ran[1]),:,:],cmap = 'gray')
title('original {}'.format(int(ran[1]))), xticks([]), plt.yticks([])
subplot(1,4,4),imshow(data1[int(ran[1]),:,:],cmap='gray')
title('Sample {}'.format(int(ran[1]))), xticks([]), plt.yticks([])
clim([0, 45])
Great, let's move to our model after this long preprocessing phase.
Let's create our variational autoencoder for the purpose of training.
I had issues with installing the bitstring package with the tensorflow on the same kernel so I had to seperate the notebooks.
np.save('Japanese.npy', data1)
You can move to the next notebook located :here