import numpy as np
from pandas import DataFrame, Series
from scipy.io.arff import loadarff
import scipy
data, meta = loadarff('ex_1.arff')
df = DataFrame(data)
df[df.major == 'cs']
friends | age | major | |
---|---|---|---|
0 | 8 | 20 | cs |
1 | 2 | 21 | cs |
2 | 0 | 25 | cs |
3 | 1 | 23 | cs |
dc = df.copy()
dc.major = True
dc
friends | age | major | |
---|---|---|---|
0 | 8 | 20 | True |
1 | 2 | 21 | True |
2 | 0 | 25 | True |
3 | 1 | 23 | True |
4 | 8 | 25 | True |
5 | 20 | 20 | True |
6 | 10 | 23 | True |
7 | 7 | 25 | True |
dc.friends /= 2
dc.major = df.major == 'cs'
dc.age[df.major == 'cs'] = 2
df.major.unique()
array(['cs', 'law'], dtype=object)
m = dc.major.copy().astype(int)
dm = df.copy()
dm.major = numpy.arange(len(dm.major))
df[(df.major == 'cs') + (df.friends >= 10)]
friends | age | major | |
---|---|---|---|
0 | 8 | 20 | cs |
1 | 2 | 21 | cs |
2 | 0 | 25 | cs |
3 | 1 | 23 | cs |
5 | 20 | 20 | law |
6 | 10 | 23 | law |
nums = df[df.columns[:-1]]
nums['man'] = Series(np.zeros(len(nums)), index=nums.index)
list(nums.columns)
['friends', 'age', 'man']
data, meta = loadarff('iris.arff')
df = DataFrame(data)
from itertools import cycle
from IPython.display import display
def plot_weights(weights, labels):
styles = cycle(('-', '--', '-.'))
n = len(weights[0])
ncolors = 7
i = -1
for i in range(0, n/ncolors):
plot(weights[:,i*ncolors:(i+1)*ncolors], ls=styles.next())
plot(weights[:,(i+1)*ncolors:], ls=styles.next())
legend(list(labels) + ['Bias'], loc='upper left', bbox_to_anchor=(1.01,1))
def test(fname):
m = p.fromArff(fname, .1)
a = m.trainUp()
print 'Done Training'
# Plot the accuracies over epochs
ps = m.perceptrons.items()
for i in range(len(a[0][1])):
figure()
plot_weights(array([x[1][i] for x in a]), m.data.columns[:-1])
title(ps[i][0][0] + ' vs ' + ps[i][0][1])
xlabel('Epoch')
ylabel('Weight')
if len(a[0][0]) > 1:
figure()
plot(array([x[0] for x in a]))
legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
ylim(-0.1, 1.1)
title('Individual Perceptron Accuracy')
xlabel('Epoch')
ylabel('% Accuracy')
figure()
plot(array([array(x[0]).mean() for x in a]))
title('Mean Accuracy')
xlabel('Epoch')
ylabel('% Accuracy')
return m, a
def show_resuls(m, data, a):
# Plot the accuracies over epochs
ps = m.perceptrons.items()
print 'Weights vs Epochs for each perceptron:'
for i in range(len(a[0][1])):
f = figure()
plot_weights(array([x[1][i] for x in a]), data.columns[:-1])
title(ps[i][0][0] + ' vs ' + ps[i][0][1])
xlabel('Epoch')
ylabel('Weight')
display(f)
print 'Accuracies:'
if len(a[0][0]) > 1:
f = figure()
plot(array([x[0] for x in a]))
legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
ylim(-0.1, 1.1)
title('Individual Perceptron Accuracy')
xlabel('Epoch')
ylabel('% Accuracy')
display(f)
f = figure()
plot(array([array(x[0]).mean() for x in a]))
title('Mean Accuracy')
xlabel('Epoch')
ylabel('% Accuracy')
display(f)
import perceive as p
from pylab import *
#a = m.train(d, split=.7)
#a
def htmltitle(*args, **kwds):
level = str(kwds.get('level', 2))
message = ' '.join(map(str, args))
display(HTML('<h' + level + '>' + message + '</h' + level + '>'))
def try_rates(meta, data, rates):
allz = []
for rate in rates:
htmltitle('Learning Rate', rate)
m = p.Main(meta, rate)
results = m.train(data.copy(), split=None)
print 'Best accuracy', m.best
print 'Number of epochs:', len(results)
show_resuls(m, data, results)
allz.append(results)
compare = DataFrame({
"Rate": Series(rates),
"Epochs": Series(tuple(len(results) for results in allz))
}, columns=['Rate', 'Epochs'])
htmltitle('Number of Epochs vs Learning Rate')
display(compare)
return allz
def trials(meta, data, num, split=.7):
allz = []
print 'Running', num, 'trials'
accuracies = []
for i in range(num):
htmltitle('Trial', i+1)
m = p.Main(meta, .1)
results, accuracy = m.train(data.copy(), split=split)
print 'Best accuracy', m.best
htmltitle('Percent missed (of the test set):', accuracy[0], '; ', accuracy[1], ' instances', level=4)
accuracies.append(accuracy[0])
show_resuls(m, data, results)
allz.append(results)
htmltitle('Final Results')
print 'Mean accuracy over', num, 'runs:', 1 - sum(accuracies)/float(num)
return allz
def runthis(fname, split=.7):
data, meta = loadarff(fname)
print 'Running perceptron classification on', fname, 'with a testing split of', split
#figsize(12, 6)
rcParams['figure.figsize'] = (6, 3)
return trials(meta, DataFrame(data), 5, split)
data, meta = loadarff('./ex_1.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);
Fully trained Best accuracy 1.0 Number of epochs: 5 Weights vs Epochs for each perceptron:
Accuracies:
Fully trained Best accuracy 1.0 Number of epochs: 3 Weights vs Epochs for each perceptron:
Accuracies:
Fully trained Best accuracy 1.0 Number of epochs: 3 Weights vs Epochs for each perceptron:
Accuracies:
Fully trained Best accuracy 1.0 Number of epochs: 7 Weights vs Epochs for each perceptron:
Accuracies:
Fully trained Best accuracy 1.0 Number of epochs: 7 Weights vs Epochs for each perceptron:
Accuracies:
Fully trained Best accuracy 1.0 Number of epochs: 7 Weights vs Epochs for each perceptron:
Accuracies:
Rate | Epochs | |
---|---|---|
0 | 1000.000 | 5 |
1 | 100.000 | 3 |
2 | 10.000 | 3 |
3 | 1.000 | 7 |
4 | 0.010 | 7 |
5 | 0.001 | 7 |
data, meta = loadarff('./ex_2.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);
Done classifying; no progress in past 20 epochs Best accuracy 0.75 Number of epochs: 46 Weights vs Epochs for each perceptron:
Accuracies:
Done classifying; no progress in past 20 epochs Best accuracy 0.875 Number of epochs: 37 Weights vs Epochs for each perceptron:
Accuracies:
Done classifying; no progress in past 20 epochs Best accuracy 0.875 Number of epochs: 44 Weights vs Epochs for each perceptron:
Accuracies:
Done classifying; no progress in past 20 epochs Best accuracy 0.75 Number of epochs: 21 Weights vs Epochs for each perceptron:
Accuracies:
Done classifying; no progress in past 20 epochs Best accuracy 0.875 Number of epochs: 24 Weights vs Epochs for each perceptron:
Accuracies:
Done classifying; no progress in past 20 epochs Best accuracy 0.875 Number of epochs: 37 Weights vs Epochs for each perceptron:
Accuracies:
Rate | Epochs | |
---|---|---|
0 | 1000.000 | 46 |
1 | 100.000 | 37 |
2 | 10.000 | 44 |
3 | 1.000 | 21 |
4 | 0.010 | 24 |
5 | 0.001 | 37 |
data, meta = loadarff('./ex_1.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)
n = p.normalizers(data, meta)
data = p.normalize(n, data)
f = figure()
#data[data.columns[:2]].plot()
cs = data[data.major == 'cs']
scatter(cs.friends, cs.age, color='r')
law = data[data.major == 'law']
scatter(law.friends, law.age, color='b')
wx, wy, bias = m.best_weights[0]
y1 = min(data[data.columns[1]])
y2 = max(data[data.columns[1]])
x1 = (-bias - y1 * wy) / wx
x2 = (-bias - y2 * wy) / wx
legend(('CS', 'Law'))
plot([x1, x2], [y1, y2])
xlabel('Friends (normalized)')
ylabel('Age (normalized)')
title('Friends vs Age of CS and Law students')
display(f)
data, m.best_weights
Fully trained
( friends age major 0 0.40 0.0 cs 1 0.10 0.2 cs 2 0.00 1.0 cs 3 0.05 0.6 cs 4 0.40 1.0 law 5 1.00 0.0 law 6 0.50 0.6 law 7 0.35 1.0 law, [array([-0.215, -0.08 , 0.1 ])])
data, meta = loadarff('./ex_2.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)
n = p.normalizers(data, meta)
data = p.normalize(n, data)
f = figure()
#data[data.columns[:2]].plot()
cs = data[data.hair_color == 'brown']
scatter(cs.cats, cs.age, color='r')
law = data[data.hair_color == 'blond']
scatter(law.cats, law.age, color='b')
wx, wy, bias = m.best_weights[0]
x1 = min(data[data.columns[0]])
x2 = max(data[data.columns[0]])
y1 = (-bias - x1 * wx) / wy
y2 = (-bias - x2 * wx) / wy
legend(('Brown', 'Blond'))
plot([x1, x2], [y1, y2])
xlabel('Number of Cats (normalized)')
ylabel('Age (normalized)')
title('Hair Color of Cat Owners')
display(f)
data, m.best_weights
Done classifying; no progress in past 20 epochs
( cats age hair_color 0 0.50 0.00 brown 1 0.00 0.00 blond 2 0.00 0.50 brown 3 0.25 0.50 blond 4 0.50 0.25 brown 5 0.50 0.75 blond 6 0.00 1.00 brown 7 1.00 1.00 blond, [array([-0.125, -0.05 , 0.1 ])])
from base64 import encodestring
import matplotlib
from IPython.display import display, HTML
from IPython.core.pylabtools import print_figure
def png2x(fig):
"""render figure to 2x PNG via HTML"""
x,y = matplotlib.rcParams['figure.figsize']
dpi = matplotlib.rcParams['savefig.dpi']
x2x = int(x * dpi / 2)
y2x = int(y * dpi / 2)
png = print_figure(fig, 'png')
png64 = encodestring(png).decode('ascii')
return u"<img src='data:image/png;base64,%s'/>" % (png64)
ip = get_ipython()
html_formatter = ip.display_formatter.formatters['text/html']
html_formatter.for_type(matplotlib.figure.Figure, png2x)
<function __main__.png2x>
import matplotlib
plot
<function matplotlib.pyplot.plot>
matplotlib.rcParams['lines.linewidth'] = .5
matplotlib.rcParams['axes.linewidth'] = .5
matplotlib.rcParams['patch.linewidth'] = .5
matplotlib.rcParams['font.size'] = 8
matplotlib.rcParams['figure.facecolor'] = 'white'
grey = [.6]*3
dgrey = [.3]*3
matplotlib.rcParams['xtick.color'] = dgrey
matplotlib.rcParams['ytick.color'] = dgrey
matplotlib.rcParams['axes.edgecolor'] = grey
matplotlib.rcParams['axes.facecolor'] = [.99]*3
iris_res = runthis('iris.arff')
Running perceptron classification on iris.arff with a testing split of 0.7 Running 5 trials
Using 105 for training, and 45 for testing Fully trained Best accuracy 1.0
Weights vs Epochs for each perceptron:
Accuracies:
Using 105 for training, and 45 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.995098039216
Weights vs Epochs for each perceptron:
Accuracies:
Using 105 for training, and 45 for testing Fully trained Best accuracy 1.0
Weights vs Epochs for each perceptron:
Accuracies:
Using 105 for training, and 45 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.995305164319
Weights vs Epochs for each perceptron:
Accuracies:
Using 105 for training, and 45 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.990338164251
Weights vs Epochs for each perceptron:
Accuracies:
Mean accuracy over 5 runs: 0.893333333333
# for trying out styles
f = figure()
plot([x[1][2] for x in iris_res[0]])
display(f)
vote_res = runthis('./votingMissingValuesReplaced.arff')
Running perceptron classification on ./votingMissingValuesReplaced.arff with a testing split of 0.7 Running 5 trials
Using 322 for training, and 139 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.968944099379
Weights vs Epochs for each perceptron:
Accuracies:
Using 322 for training, and 139 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.972049689441
Weights vs Epochs for each perceptron:
Accuracies:
Using 322 for training, and 139 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.962732919255
Weights vs Epochs for each perceptron:
Accuracies:
Using 322 for training, and 139 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.962732919255
Weights vs Epochs for each perceptron:
Accuracies:
Using 322 for training, and 139 for testing Done classifying; no progress in past 20 epochs Best accuracy 0.981366459627
Weights vs Epochs for each perceptron:
Accuracies:
Mean accuracy over 5 runs: 0.93381294964