import numpy as np
from pandas import DataFrame, Series
from scipy.io.arff import loadarff
import scipy

data, meta = loadarff('ex_1.arff')

df = DataFrame(data)

df[df.major == 'cs']

dc = df.copy()

dc.major = True

dc

dc.friends /= 2

dc.major = df.major == 'cs'

dc.age[df.major == 'cs'] = 2

df.major.unique()

m = dc.major.copy().astype(int)

dm = df.copy()
dm.major = numpy.arange(len(dm.major))

df[(df.major == 'cs') + (df.friends >= 10)]

nums = df[df.columns[:-1]]

nums['man'] = Series(np.zeros(len(nums)), index=nums.index)

list(nums.columns)

data, meta = loadarff('iris.arff')
df = DataFrame(data)

from itertools import cycle
from IPython.display import display

def plot_weights(weights, labels):
    styles = cycle(('-', '--', '-.'))
    n = len(weights[0])
    ncolors = 7
    i = -1
    for i in range(0, n/ncolors):
        plot(weights[:,i*ncolors:(i+1)*ncolors], ls=styles.next())
    plot(weights[:,(i+1)*ncolors:], ls=styles.next())
    legend(list(labels) + ['Bias'], loc='upper left', bbox_to_anchor=(1.01,1))

def test(fname):
    m = p.fromArff(fname, .1)
    a = m.trainUp()
    print 'Done Training'
    # Plot the accuracies over epochs
    ps = m.perceptrons.items()
    for i in range(len(a[0][1])):
        figure()
        plot_weights(array([x[1][i] for x in a]), m.data.columns[:-1])
        title(ps[i][0][0] + ' vs ' + ps[i][0][1])
        xlabel('Epoch')
        ylabel('Weight')

    if len(a[0][0]) > 1:
        figure()
        plot(array([x[0] for x in a]))
        legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
        ylim(-0.1, 1.1)
        title('Individual Perceptron Accuracy')
        xlabel('Epoch')
        ylabel('% Accuracy')

    figure()
    plot(array([array(x[0]).mean() for x in a]))
    title('Mean Accuracy')
    xlabel('Epoch')
    ylabel('% Accuracy')
    return m, a

def show_resuls(m, data, a):
    # Plot the accuracies over epochs
    ps = m.perceptrons.items()
    print 'Weights vs Epochs for each perceptron:'
    for i in range(len(a[0][1])):
        f = figure()
        plot_weights(array([x[1][i] for x in a]), data.columns[:-1])
        title(ps[i][0][0] + ' vs ' + ps[i][0][1])
        xlabel('Epoch')
        ylabel('Weight')
        display(f)

    print 'Accuracies:'
    if len(a[0][0]) > 1:
        f = figure()
        plot(array([x[0] for x in a]))
        legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1))
        ylim(-0.1, 1.1)
        title('Individual Perceptron Accuracy')
        xlabel('Epoch')
        ylabel('% Accuracy')
        display(f)
    f = figure()
    plot(array([array(x[0]).mean() for x in a]))
    title('Mean Accuracy')
    xlabel('Epoch')
    ylabel('% Accuracy')
    display(f)

import perceive as p
from pylab import *
#a = m.train(d, split=.7)
#a

def htmltitle(*args, **kwds):
    level = str(kwds.get('level', 2))
    message = ' '.join(map(str, args))
    display(HTML('<h' + level + '>' + message + '</h' + level + '>'))

def try_rates(meta, data, rates):
    allz = []
    for rate in rates:
        htmltitle('Learning Rate', rate)
        m = p.Main(meta, rate)
        results = m.train(data.copy(), split=None)
        print 'Best accuracy', m.best
        print 'Number of epochs:', len(results)
        show_resuls(m, data, results)
        allz.append(results)
    compare = DataFrame({
        "Rate": Series(rates),
        "Epochs": Series(tuple(len(results) for results in allz))
    }, columns=['Rate', 'Epochs'])
    htmltitle('Number of Epochs vs Learning Rate')
    display(compare)
    return allz
    
def trials(meta, data, num, split=.7):
    allz = []
    print 'Running', num, 'trials'
    accuracies = []
    for i in range(num):
        htmltitle('Trial', i+1)
        m = p.Main(meta, .1)
        results, accuracy = m.train(data.copy(), split=split)
        print 'Best accuracy', m.best
        htmltitle('Percent missed (of the test set):', accuracy[0], '; ', accuracy[1], ' instances', level=4)
        accuracies.append(accuracy[0])
        show_resuls(m, data, results)
        allz.append(results)
    htmltitle('Final Results')
    print 'Mean accuracy over', num, 'runs:', 1 - sum(accuracies)/float(num)
    return allz

def runthis(fname, split=.7):
    data, meta = loadarff(fname)
    print 'Running perceptron classification on', fname, 'with a testing split of', split
    
    #figsize(12, 6)
    rcParams['figure.figsize'] = (6, 3)
    return trials(meta, DataFrame(data), 5, split)

data, meta = loadarff('./ex_1.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);

data, meta = loadarff('./ex_2.arff')
try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]);

data, meta = loadarff('./ex_1.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)

n = p.normalizers(data, meta)
data = p.normalize(n, data)

f = figure()
#data[data.columns[:2]].plot()
cs = data[data.major == 'cs']
scatter(cs.friends, cs.age, color='r')

law = data[data.major == 'law']
scatter(law.friends, law.age, color='b')

wx, wy, bias = m.best_weights[0]

y1 = min(data[data.columns[1]])
y2 = max(data[data.columns[1]])

x1 = (-bias - y1 * wy) / wx
x2 = (-bias - y2 * wy) / wx

legend(('CS', 'Law'))
plot([x1, x2], [y1, y2])
xlabel('Friends (normalized)')
ylabel('Age (normalized)')
title('Friends vs Age of CS and Law students')
display(f)
data, m.best_weights

data, meta = loadarff('./ex_2.arff')
data = DataFrame(data)
m = p.Main(meta, .1)
history = m.train(data)

n = p.normalizers(data, meta)
data = p.normalize(n, data)

f = figure()
#data[data.columns[:2]].plot()
cs = data[data.hair_color == 'brown']
scatter(cs.cats, cs.age, color='r')

law = data[data.hair_color == 'blond']
scatter(law.cats, law.age, color='b')

wx, wy, bias = m.best_weights[0]

x1 = min(data[data.columns[0]])
x2 = max(data[data.columns[0]])

y1 = (-bias - x1 * wx) / wy
y2 = (-bias - x2 * wx) / wy

legend(('Brown', 'Blond'))
plot([x1, x2], [y1, y2])
xlabel('Number of Cats (normalized)')
ylabel('Age (normalized)')
title('Hair Color of Cat Owners')
display(f)
data, m.best_weights

from base64 import encodestring

import matplotlib

from IPython.display import display, HTML
from IPython.core.pylabtools import print_figure

def png2x(fig):
    """render figure to 2x PNG via HTML"""
    x,y = matplotlib.rcParams['figure.figsize']
    dpi = matplotlib.rcParams['savefig.dpi']
    x2x = int(x * dpi / 2)
    y2x = int(y * dpi / 2)
    png = print_figure(fig, 'png')
    png64 = encodestring(png).decode('ascii')
    return u"<img src='data:image/png;base64,%s'/>" % (png64)

ip = get_ipython()
html_formatter = ip.display_formatter.formatters['text/html']
html_formatter.for_type(matplotlib.figure.Figure, png2x)

import matplotlib

plot

matplotlib.rcParams['lines.linewidth'] = .5
matplotlib.rcParams['axes.linewidth'] = .5
matplotlib.rcParams['patch.linewidth'] = .5
matplotlib.rcParams['font.size'] = 8
matplotlib.rcParams['figure.facecolor'] = 'white'

grey = [.6]*3
dgrey = [.3]*3
matplotlib.rcParams['xtick.color'] = dgrey
matplotlib.rcParams['ytick.color'] = dgrey
matplotlib.rcParams['axes.edgecolor'] = grey
matplotlib.rcParams['axes.facecolor'] = [.99]*3
iris_res = runthis('iris.arff')

# for trying out styles
f = figure()
plot([x[1][2] for x in iris_res[0]])
display(f)

vote_res = runthis('./votingMissingValuesReplaced.arff')