In [1]:
import numpy as np
from pandas import DataFrame, Series
from scipy.io.arff import loadarff
import scipy
In [2]:
data, meta = loadarff('ex_1.arff')
In [3]:
df = DataFrame(data)
In [4]:
df[df.major == 'cs']
Out[4]:
friends age major
0 8 20 cs
1 2 21 cs
2 0 25 cs
3 1 23 cs
In [50]:
dc = df.copy()
In [51]:
dc.major = True
In [52]:
dc
Out[52]:
friends age major
0 8 20 True
1 2 21 True
2 0 25 True
3 1 23 True
4 8 25 True
5 20 20 True
6 10 23 True
7 7 25 True
In [61]:
dc.friends /= 2
In [64]:
dc.major = df.major == 'cs'
In [68]:
dc.age[df.major == 'cs'] = 2
In [72]:
df.major.unique()
Out[72]:
array(['cs', 'law'], dtype=object)
In [77]:
m = dc.major.copy().astype(int)
In [86]:
dm = df.copy()
dm.major = numpy.arange(len(dm.major))
In [99]:
df[(df.major == 'cs') + (df.friends >= 10)]
Out[99]:
friends age major
0 8 20 cs
1 2 21 cs
2 0 25 cs
3 1 23 cs
5 20 20 law
6 10 23 law
In [108]:
nums = df[df.columns[:-1]]
In [127]:
nums['man'] = Series(np.zeros(len(nums)), index=nums.index)
In [147]:
list(nums.columns)
Out[147]:
['friends', 'age', 'man']
In [45]:
data, meta = loadarff('iris.arff')
df = DataFrame(data)
In [3]:
from itertools import cycle
from IPython.display import display

def plot_weights(weights, labels):
    styles = cycle(('-', '--', '-.'))
    n = len(weights[0])
    ncolors = 7
    i = -1
    for i in range(0, n/ncolors):
        plot(weights[:,i*ncolors:(i+1)*ncolors], ls=styles.next())
    plot(weights[:,(i+1)*ncolors:], ls=styles.next())
    legend(list(labels) + ['Bias'], loc='upper left', bbox_to_anchor=(1.01,1))

def test(fname):
    m = p.fromArff(fname, .1)
    a = m.trainUp()
    print 'Done Training'
    # Plot the accuracies over epochs
    ps = m.perceptrons.items()
    for i in range(len(a[0][1])):
        figure()
        plot_weights(array([x[1][i] for x in a]), m.data.columns[:-1])
        title(ps[i][0][0] + ' vs ' + ps[i][0][1])
        xlabel('Epoch')
        ylabel('Weight')

    if len(a[0][0]) > 1:
        figure()
        plot(array([x[0] for x in a]))
        legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
        ylim(-0.1, 1.1)
        title('Individual Perceptron Accuracy')
        xlabel('Epoch')
        ylabel('% Accuracy')

    figure()
    plot(array([array(x[0]).mean() for x in a]))
    title('Mean Accuracy')
    xlabel('Epoch')
    ylabel('% Accuracy')
    return m, a

def show_resuls(m, data, a):
    # Plot the accuracies over epochs
    ps = m.perceptrons.items()
    print 'Weights vs Epochs for each perceptron:'
    for i in range(len(a[0][1])):
        f = figure()
        plot_weights(array([x[1][i] for x in a]), data.columns[:-1])
        title(ps[i][0][0] + ' vs ' + ps[i][0][1])
        xlabel('Epoch')
        ylabel('Weight')
        display(f)

    print 'Accuracies:'
    if len(a[0][0]) > 1:
        f = figure()
        plot(array([x[0] for x in a]))
        legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
        ylim(-0.1, 1.1)
        title('Individual Perceptron Accuracy')
        xlabel('Epoch')
        ylabel('% Accuracy')
        display(f)
    f = figure()
    plot(array([array(x[0]).mean() for x in a]))
    title('Mean Accuracy')
    xlabel('Epoch')
    ylabel('% Accuracy')
    display(f)
In [95]:
import perceive as p
from pylab import *
#a = m.train(d, split=.7)
#a

def htmltitle(*args, **kwds):
    level = str(kwds.get('level', 2))
    message = ' '.join(map(str, args))
    display(HTML('<h' + level + '>' + message + '</h' + level + '>'))

def try_rates(meta, data, rates):
    allz = []
    for rate in rates:
        htmltitle('Learning Rate', rate)
        m = p.Main(meta, rate)
        results = m.train(data.copy(), split=None)
        print 'Best accuracy', m.best
        print 'Number of epochs:', len(results)
        show_resuls(m, data, results)
        allz.append(results)
    compare = DataFrame({
        "Rate": Series(rates),
        "Epochs": Series(tuple(len(results) for results in allz))
    }, columns=['Rate', 'Epochs'])
    htmltitle('Number of Epochs vs Learning Rate')
    display(compare)
    return allz
    
def trials(meta, data, num, split=.7):
    allz = []
    print 'Running', num, 'trials'
    accuracies = []
    for i in range(num):
        htmltitle('Trial', i+1)
        m = p.Main(meta, .1)
        results, accuracy = m.train(data.copy(), split=split)
        print 'Best accuracy', m.best
        htmltitle('Percent missed (of the test set):', accuracy[0], '; ', accuracy[1], ' instances', level=4)
        accuracies.append(accuracy[0])
        show_resuls(m, data, results)
        allz.append(results)
    htmltitle('Final Results')
    print 'Mean accuracy over', num, 'runs:', 1 - sum(accuracies)/float(num)
    return allz

def runthis(fname, split=.7):
    data, meta = loadarff(fname)
    print 'Running perceptron classification on', fname, 'with a testing split of', split
    
    #figsize(12, 6)
    rcParams['figure.figsize'] = (6, 3)
    return trials(meta, DataFrame(data), 5, split)
In [98]:
data, meta = loadarff('./ex_1.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);

Learning Rate 1000

Fully trained
Best accuracy 1.0
Number of epochs: 5
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 100

Fully trained
Best accuracy 1.0
Number of epochs: 3
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 10

Fully trained
Best accuracy 1.0
Number of epochs: 3
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 1

Fully trained
Best accuracy 1.0
Number of epochs: 7
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 0.01

Fully trained
Best accuracy 1.0
Number of epochs: 7
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 0.001

Fully trained
Best accuracy 1.0
Number of epochs: 7
Weights vs Epochs for each perceptron:
Accuracies:

Number of Epochs vs Learning Rate

Rate Epochs
0 1000.000 5
1 100.000 3
2 10.000 3
3 1.000 7
4 0.010 7
5 0.001 7
In [100]:
data, meta = loadarff('./ex_2.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);

Learning Rate 1000

Done classifying; no progress in past 20 epochs
Best accuracy 0.75
Number of epochs: 46
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 100

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 37
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 10

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 44
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 1

Done classifying; no progress in past 20 epochs
Best accuracy 0.75
Number of epochs: 21
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 0.01

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 24
Weights vs Epochs for each perceptron:
Accuracies:

Learning Rate 0.001

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 37
Weights vs Epochs for each perceptron:
Accuracies:

Number of Epochs vs Learning Rate

Rate Epochs
0 1000.000 46
1 100.000 37
2 10.000 44
3 1.000 21
4 0.010 24
5 0.001 37
In [172]:
data, meta = loadarff('./ex_1.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)

n = p.normalizers(data, meta)
data = p.normalize(n, data)

f = figure()
#data[data.columns[:2]].plot()
cs = data[data.major == 'cs']
scatter(cs.friends, cs.age, color='r')

law = data[data.major == 'law']
scatter(law.friends, law.age, color='b')

wx, wy, bias = m.best_weights[0]

y1 = min(data[data.columns[1]])
y2 = max(data[data.columns[1]])

x1 = (-bias - y1 * wy) / wx
x2 = (-bias - y2 * wy) / wx

legend(('CS', 'Law'))
plot([x1, x2], [y1, y2])
xlabel('Friends (normalized)')
ylabel('Age (normalized)')
title('Friends vs Age of CS and Law students')
display(f)
data, m.best_weights
Fully trained
Out[172]:
(   friends  age major
0     0.40  0.0    cs
1     0.10  0.2    cs
2     0.00  1.0    cs
3     0.05  0.6    cs
4     0.40  1.0   law
5     1.00  0.0   law
6     0.50  0.6   law
7     0.35  1.0   law,
 [array([-0.215, -0.08 ,  0.1  ])])
In [168]:
data, meta = loadarff('./ex_2.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)

n = p.normalizers(data, meta)
data = p.normalize(n, data)

f = figure()
#data[data.columns[:2]].plot()
cs = data[data.hair_color == 'brown']
scatter(cs.cats, cs.age, color='r')

law = data[data.hair_color == 'blond']
scatter(law.cats, law.age, color='b')

wx, wy, bias = m.best_weights[0]

x1 = min(data[data.columns[0]])
x2 = max(data[data.columns[0]])

y1 = (-bias - x1 * wx) / wy
y2 = (-bias - x2 * wx) / wy

legend(('Brown', 'Blond'))
plot([x1, x2], [y1, y2])
xlabel('Number of Cats (normalized)')
ylabel('Age (normalized)')
title('Hair Color of Cat Owners')
display(f)
data, m.best_weights
Done classifying; no progress in past 20 epochs
Out[168]:
(   cats   age hair_color
0  0.50  0.00      brown
1  0.00  0.00      blond
2  0.00  0.50      brown
3  0.25  0.50      blond
4  0.50  0.25      brown
5  0.50  0.75      blond
6  0.00  1.00      brown
7  1.00  1.00      blond,
 [array([-0.125, -0.05 ,  0.1  ])])
In [15]:
from base64 import encodestring

import matplotlib

from IPython.display import display, HTML
from IPython.core.pylabtools import print_figure

def png2x(fig):
    """render figure to 2x PNG via HTML"""
    x,y = matplotlib.rcParams['figure.figsize']
    dpi = matplotlib.rcParams['savefig.dpi']
    x2x = int(x * dpi / 2)
    y2x = int(y * dpi / 2)
    png = print_figure(fig, 'png')
    png64 = encodestring(png).decode('ascii')
    return u"<img src='data:image/png;base64,%s'/>" % (png64)

ip = get_ipython()
html_formatter = ip.display_formatter.formatters['text/html']
html_formatter.for_type(matplotlib.figure.Figure, png2x)
Out[15]:
<function __main__.png2x>
In [5]:
import matplotlib
In [8]:
plot
Out[8]:
<function matplotlib.pyplot.plot>
In [80]:
matplotlib.rcParams['lines.linewidth'] = .5
matplotlib.rcParams['axes.linewidth'] = .5
matplotlib.rcParams['patch.linewidth'] = .5
matplotlib.rcParams['font.size'] = 8
matplotlib.rcParams['figure.facecolor'] = 'white'

grey = [.6]*3
dgrey = [.3]*3
matplotlib.rcParams['xtick.color'] = dgrey
matplotlib.rcParams['ytick.color'] = dgrey
matplotlib.rcParams['axes.edgecolor'] = grey
matplotlib.rcParams['axes.facecolor'] = [.99]*3
iris_res = runthis('iris.arff')
Running perceptron classification on iris.arff with a testing split of 0.7
Running 5 trials

Trial 1

Using 105 for training, and 45 for testing
Fully trained
Best accuracy 1.0

Percent missed (of the test set): 0.0444444444444 ; 2.0 instances

Weights vs Epochs for each perceptron:
Accuracies:

Trial 2

Using 105 for training, and 45 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.995098039216

Percent missed (of the test set): 0.222222222222 ; 10.0 instances

Weights vs Epochs for each perceptron: