In [1]:

import numpy as np
from pandas import DataFrame, Series
from scipy.io.arff import loadarff
import scipy

In [2]:

data, meta = loadarff('ex_1.arff')

In [3]:

df = DataFrame(data)

In [4]:

df[df.major == 'cs']

Out[4]:

	friends	age	major
0	8	20	cs
1	2	21	cs
2	0	25	cs
3	1	23	cs

In [50]:

dc = df.copy()

In [51]:

dc.major = True

In [52]:

dc

Out[52]:

	friends	age	major
0	8	20	True
1	2	21	True
2	0	25	True
3	1	23	True
4	8	25	True
5	20	20	True
6	10	23	True
7	7	25	True

In [61]:

dc.friends /= 2

In [64]:

dc.major = df.major == 'cs'

In [68]:

dc.age[df.major == 'cs'] = 2

In [72]:

df.major.unique()

Out[72]:

array(['cs', 'law'], dtype=object)

In [77]:

m = dc.major.copy().astype(int)

In [86]:

dm = df.copy()
dm.major = numpy.arange(len(dm.major))

In [99]:

df[(df.major == 'cs') + (df.friends >= 10)]

Out[99]:

	friends	age	major
0	8	20	cs
1	2	21	cs
2	0	25	cs
3	1	23	cs
5	20	20	law
6	10	23	law

In [108]:

nums = df[df.columns[:-1]]

In [127]:

nums['man'] = Series(np.zeros(len(nums)), index=nums.index)

In [147]:

list(nums.columns)

Out[147]:

['friends', 'age', 'man']

In [45]:

data, meta = loadarff('iris.arff')
df = DataFrame(data)

In [3]:

from itertools import cycle
from IPython.display import display

def plot_weights(weights, labels):
    styles = cycle(('-', '--', '-.'))
    n = len(weights[0])
    ncolors = 7
    i = -1
    for i in range(0, n/ncolors):
        plot(weights[:,i*ncolors:(i+1)*ncolors], ls=styles.next())
    plot(weights[:,(i+1)*ncolors:], ls=styles.next())
    legend(list(labels) + ['Bias'], loc='upper left', bbox_to_anchor=(1.01,1))

def test(fname):
    m = p.fromArff(fname, .1)
    a = m.trainUp()
    print 'Done Training'
    # Plot the accuracies over epochs
    ps = m.perceptrons.items()
    for i in range(len(a[0][1])):
        figure()
        plot_weights(array([x[1][i] for x in a]), m.data.columns[:-1])
        title(ps[i][0][0] + ' vs ' + ps[i][0][1])
        xlabel('Epoch')
        ylabel('Weight')

    if len(a[0][0]) > 1:
        figure()
        plot(array([x[0] for x in a]))
        legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
        ylim(-0.1, 1.1)
        title('Individual Perceptron Accuracy')
        xlabel('Epoch')
        ylabel('% Accuracy')

    figure()
    plot(array([array(x[0]).mean() for x in a]))
    title('Mean Accuracy')
    xlabel('Epoch')
    ylabel('% Accuracy')
    return m, a

def show_resuls(m, data, a):
    # Plot the accuracies over epochs
    ps = m.perceptrons.items()
    print 'Weights vs Epochs for each perceptron:'
    for i in range(len(a[0][1])):
        f = figure()
        plot_weights(array([x[1][i] for x in a]), data.columns[:-1])
        title(ps[i][0][0] + ' vs ' + ps[i][0][1])
        xlabel('Epoch')
        ylabel('Weight')
        display(f)

    print 'Accuracies:'
    if len(a[0][0]) > 1:
        f = figure()
        plot(array([x[0] for x in a]))
        legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
        ylim(-0.1, 1.1)
        title('Individual Perceptron Accuracy')
        xlabel('Epoch')
        ylabel('% Accuracy')
        display(f)
    f = figure()
    plot(array([array(x[0]).mean() for x in a]))
    title('Mean Accuracy')
    xlabel('Epoch')
    ylabel('% Accuracy')
    display(f)

In [95]:

import perceive as p
from pylab import *
#a = m.train(d, split=.7)
#a

def htmltitle(*args, **kwds):
    level = str(kwds.get('level', 2))
    message = ' '.join(map(str, args))
    display(HTML('<h' + level + '>' + message + '</h' + level + '>'))

def try_rates(meta, data, rates):
    allz = []
    for rate in rates:
        htmltitle('Learning Rate', rate)
        m = p.Main(meta, rate)
        results = m.train(data.copy(), split=None)
        print 'Best accuracy', m.best
        print 'Number of epochs:', len(results)
        show_resuls(m, data, results)
        allz.append(results)
    compare = DataFrame({
        "Rate": Series(rates),
        "Epochs": Series(tuple(len(results) for results in allz))
    }, columns=['Rate', 'Epochs'])
    htmltitle('Number of Epochs vs Learning Rate')
    display(compare)
    return allz
    
def trials(meta, data, num, split=.7):
    allz = []
    print 'Running', num, 'trials'
    accuracies = []
    for i in range(num):
        htmltitle('Trial', i+1)
        m = p.Main(meta, .1)
        results, accuracy = m.train(data.copy(), split=split)
        print 'Best accuracy', m.best
        htmltitle('Percent missed (of the test set):', accuracy[0], '; ', accuracy[1], ' instances', level=4)
        accuracies.append(accuracy[0])
        show_resuls(m, data, results)
        allz.append(results)
    htmltitle('Final Results')
    print 'Mean accuracy over', num, 'runs:', 1 - sum(accuracies)/float(num)
    return allz

def runthis(fname, split=.7):
    data, meta = loadarff(fname)
    print 'Running perceptron classification on', fname, 'with a testing split of', split
    
    #figsize(12, 6)
    rcParams['figure.figsize'] = (6, 3)
    return trials(meta, DataFrame(data), 5, split)

In [98]:

data, meta = loadarff('./ex_1.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);

Learning Rate 1000

Fully trained
Best accuracy 1.0
Number of epochs: 5
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 100

Fully trained
Best accuracy 1.0
Number of epochs: 3
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 10

Fully trained
Best accuracy 1.0
Number of epochs: 3
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 1

Fully trained
Best accuracy 1.0
Number of epochs: 7
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 0.01

Fully trained
Best accuracy 1.0
Number of epochs: 7
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 0.001

Fully trained
Best accuracy 1.0
Number of epochs: 7
Weights vs Epochs for each perceptron:

Accuracies:

Number of Epochs vs Learning Rate

	Rate	Epochs
0	1000.000	5
1	100.000	3
2	10.000	3
3	1.000	7
4	0.010	7
5	0.001	7

In [100]:

data, meta = loadarff('./ex_2.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);

Learning Rate 1000

Done classifying; no progress in past 20 epochs
Best accuracy 0.75
Number of epochs: 46
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 100

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 37
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 10

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 44
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 1

Done classifying; no progress in past 20 epochs
Best accuracy 0.75
Number of epochs: 21
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 0.01

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 24
Weights vs Epochs for each perceptron:

Accuracies:

Learning Rate 0.001

Done classifying; no progress in past 20 epochs
Best accuracy 0.875
Number of epochs: 37
Weights vs Epochs for each perceptron:

Accuracies:

Number of Epochs vs Learning Rate

	Rate	Epochs
0	1000.000	46
1	100.000	37
2	10.000	44
3	1.000	21
4	0.010	24
5	0.001	37

In [172]:

data, meta = loadarff('./ex_1.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)

n = p.normalizers(data, meta)
data = p.normalize(n, data)

f = figure()
#data[data.columns[:2]].plot()
cs = data[data.major == 'cs']
scatter(cs.friends, cs.age, color='r')

law = data[data.major == 'law']
scatter(law.friends, law.age, color='b')

wx, wy, bias = m.best_weights[0]

y1 = min(data[data.columns[1]])
y2 = max(data[data.columns[1]])

x1 = (-bias - y1 * wy) / wx
x2 = (-bias - y2 * wy) / wx

legend(('CS', 'Law'))
plot([x1, x2], [y1, y2])
xlabel('Friends (normalized)')
ylabel('Age (normalized)')
title('Friends vs Age of CS and Law students')
display(f)
data, m.best_weights

Fully trained

Out[172]:

(   friends  age major
0     0.40  0.0    cs
1     0.10  0.2    cs
2     0.00  1.0    cs
3     0.05  0.6    cs
4     0.40  1.0   law
5     1.00  0.0   law
6     0.50  0.6   law
7     0.35  1.0   law,
 [array([-0.215, -0.08 ,  0.1  ])])

In [168]:

data, meta = loadarff('./ex_2.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)

n = p.normalizers(data, meta)
data = p.normalize(n, data)

f = figure()
#data[data.columns[:2]].plot()
cs = data[data.hair_color == 'brown']
scatter(cs.cats, cs.age, color='r')

law = data[data.hair_color == 'blond']
scatter(law.cats, law.age, color='b')

wx, wy, bias = m.best_weights[0]

x1 = min(data[data.columns[0]])
x2 = max(data[data.columns[0]])

y1 = (-bias - x1 * wx) / wy
y2 = (-bias - x2 * wx) / wy

legend(('Brown', 'Blond'))
plot([x1, x2], [y1, y2])
xlabel('Number of Cats (normalized)')
ylabel('Age (normalized)')
title('Hair Color of Cat Owners')
display(f)
data, m.best_weights

Done classifying; no progress in past 20 epochs

Out[168]:

(   cats   age hair_color
0  0.50  0.00      brown
1  0.00  0.00      blond
2  0.00  0.50      brown
3  0.25  0.50      blond
4  0.50  0.25      brown
5  0.50  0.75      blond
6  0.00  1.00      brown
7  1.00  1.00      blond,
 [array([-0.125, -0.05 ,  0.1  ])])

In [15]:

from base64 import encodestring

import matplotlib

from IPython.display import display, HTML
from IPython.core.pylabtools import print_figure

def png2x(fig):
    """render figure to 2x PNG via HTML"""
    x,y = matplotlib.rcParams['figure.figsize']
    dpi = matplotlib.rcParams['savefig.dpi']
    x2x = int(x * dpi / 2)
    y2x = int(y * dpi / 2)
    png = print_figure(fig, 'png')
    png64 = encodestring(png).decode('ascii')
    return u"<img src='data:image/png;base64,%s'/>" % (png64)

ip = get_ipython()
html_formatter = ip.display_formatter.formatters['text/html']
html_formatter.for_type(matplotlib.figure.Figure, png2x)

Out[15]:

<function __main__.png2x>

In [5]:

import matplotlib

In [8]:

plot

Out[8]:

<function matplotlib.pyplot.plot>

In [80]:

matplotlib.rcParams['lines.linewidth'] = .5
matplotlib.rcParams['axes.linewidth'] = .5
matplotlib.rcParams['patch.linewidth'] = .5
matplotlib.rcParams['font.size'] = 8
matplotlib.rcParams['figure.facecolor'] = 'white'

grey = [.6]*3
dgrey = [.3]*3
matplotlib.rcParams['xtick.color'] = dgrey
matplotlib.rcParams['ytick.color'] = dgrey
matplotlib.rcParams['axes.edgecolor'] = grey
matplotlib.rcParams['axes.facecolor'] = [.99]*3
iris_res = runthis('iris.arff')

Running perceptron classification on iris.arff with a testing split of 0.7
Running 5 trials

Trial 1

Using 105 for training, and 45 for testing
Fully trained
Best accuracy 1.0

Percent missed (of the test set): 0.0444444444444 ; 2.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 2

Using 105 for training, and 45 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.995098039216

Percent missed (of the test set): 0.222222222222 ; 10.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 3

Using 105 for training, and 45 for testing
Fully trained
Best accuracy 1.0

Percent missed (of the test set): 0.0666666666667 ; 3.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 4

Using 105 for training, and 45 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.995305164319

Percent missed (of the test set): 0.2 ; 9.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 5

Using 105 for training, and 45 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.990338164251

Percent missed (of the test set): 0.0 ; 0.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Final Results

Mean accuracy over 5 runs: 0.893333333333

In [57]:

# for trying out styles
f = figure()
plot([x[1][2] for x in iris_res[0]])
display(f)

In [81]:

vote_res = runthis('./votingMissingValuesReplaced.arff')

Running perceptron classification on ./votingMissingValuesReplaced.arff with a testing split of 0.7
Running 5 trials

Trial 1

Using 322 for training, and 139 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.968944099379

Percent missed (of the test set): 0.0791366906475 ; 11.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 2

Using 322 for training, and 139 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.972049689441

Percent missed (of the test set): 0.0935251798561 ; 13.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 3

Using 322 for training, and 139 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.962732919255

Percent missed (of the test set): 0.0431654676259 ; 6.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 4

Using 322 for training, and 139 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.962732919255

Percent missed (of the test set): 0.0503597122302 ; 7.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Trial 5

Using 322 for training, and 139 for testing
Done classifying; no progress in past 20 epochs
Best accuracy 0.981366459627

Percent missed (of the test set): 0.0647482014388 ; 9.0 instances

Weights vs Epochs for each perceptron:

Accuracies:

Final Results

Mean accuracy over 5 runs: 0.93381294964