Applications
Data Wrangling with Pandas
Data Collection and use of APIs
Web Application Development
Frameworks to be used
import numpy as np
import tensorflow as tf
def binary_encode(i, num_digits):
return np.array([i >> d & 1 for d in range(num_digits)][::-1])
binary_encode(9,10)
array([0, 0, 0, 0, 0, 0, 1, 0, 0, 1])
def fizz_buzz_encode(i):
if i % 15 == 0: return np.array([0, 0, 0, 1])
elif i % 5 == 0: return np.array([0, 0, 1, 0])
elif i % 3 == 0: return np.array([0, 1, 0, 0])
else: return np.array([1, 0, 0, 0])
NUM_DIGITS = 11
test_number = 200
trX = np.array([binary_encode(i, NUM_DIGITS) for i in range(test_number+1, 2 ** NUM_DIGITS)])
trY = np.array([fizz_buzz_encode(i) for i in range(test_number+1, 2 ** NUM_DIGITS)])
NUM_HIDDEN = 400
X = tf.placeholder("float", [None, NUM_DIGITS])
Y = tf.placeholder("float", [None, 4])
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
w_h = init_weights([NUM_DIGITS, NUM_HIDDEN])
w_o = init_weights([NUM_HIDDEN, 4])
print w_h.get_shape()
print w_o.get_shape()
(11, 400) (400, 4)
def model(X, w_h, w_o):
h = tf.nn.relu(tf.matmul(X, w_h))
return tf.matmul(h, w_o)
py_x = model(X, w_h, w_o)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(py_x, 1)
def fizz_buzz(i, prediction):
return [str(i), "fizz", "buzz", "fizzbuzz"][prediction]
BATCH_SIZE = 128
# Launch the graph in a session
with tf.Session() as sess:
tf.global_variables_initializer().run()
for epoch in range(10000):
# Shuffle the data before each training iteration.
p = np.random.permutation(range(len(trX)))
trX, trY = trX[p], trY[p]
# Train in batches of 128 inputs.
for start in range(0, len(trX), BATCH_SIZE):
end = start + BATCH_SIZE
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
# And print the current accuracy on the training data.
if epoch%500 ==0:
print(epoch, np.mean(np.argmax(trY, axis=1) ==
sess.run(predict_op, feed_dict={X: trX, Y: trY})))
# And now for some fizz buzz
numbers = np.arange(1, test_number)
teX = np.transpose(binary_encode(numbers, NUM_DIGITS))
pred_label = sess.run(predict_op, feed_dict={X: teX})
output = np.vectorize(fizz_buzz)(numbers, pred_label)
(0, 0.53329723876556578) (500, 0.57011369788846777) (1000, 0.92528424472116944) (1500, 0.9821331889550623) (2000, 0.99133730373578777) (2500, 0.99891716296697342) (3000, 1.0) (3500, 1.0) (4000, 1.0) (4500, 1.0) (5000, 1.0) (5500, 1.0) (6000, 1.0) (6500, 1.0) (7000, 1.0) (7500, 1.0) (8000, 1.0) (8500, 1.0) (9000, 1.0) (9500, 1.0)
def setlabel(i):
if i % 15 == 0: return 3
elif i % 5 == 0: return 2
elif i % 3 == 0: return 1
else: return 0
numbers = np.arange(1, test_number)
correct_label = np.vectorize(setlabel)(numbers)
correct_label
array([0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0])
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
%matplotlib inline
import itertools
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
font = {'size' : 8}
plt.rc('font', **font)
# Compute confusion matrix
cnf_matrix = confusion_matrix(correct_label, pred_label)
np.set_printoptions(precision=1)
class_names = ['number','fizz','buzz','fizbuzz']
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
title='Confusion matrix, without normalization')
plt.show()
Confusion matrix, without normalization [[106 1 0 0] [ 0 53 0 0] [ 0 0 26 0] [ 0 1 0 12]]