import numpy as np from pandas import DataFrame, Series from scipy.io.arff import loadarff import scipy data, meta = loadarff('ex_1.arff') df = DataFrame(data) df[df.major == 'cs'] dc = df.copy() dc.major = True dc dc.friends /= 2 dc.major = df.major == 'cs' dc.age[df.major == 'cs'] = 2 df.major.unique() m = dc.major.copy().astype(int) dm = df.copy() dm.major = numpy.arange(len(dm.major)) df[(df.major == 'cs') + (df.friends >= 10)] nums = df[df.columns[:-1]] nums['man'] = Series(np.zeros(len(nums)), index=nums.index) list(nums.columns) data, meta = loadarff('iris.arff') df = DataFrame(data) from itertools import cycle from IPython.display import display def plot_weights(weights, labels): styles = cycle(('-', '--', '-.')) n = len(weights[0]) ncolors = 7 i = -1 for i in range(0, n/ncolors): plot(weights[:,i*ncolors:(i+1)*ncolors], ls=styles.next()) plot(weights[:,(i+1)*ncolors:], ls=styles.next()) legend(list(labels) + ['Bias'], loc='upper left', bbox_to_anchor=(1.01,1)) def test(fname): m = p.fromArff(fname, .1) a = m.trainUp() print 'Done Training' # Plot the accuracies over epochs ps = m.perceptrons.items() for i in range(len(a[0][1])): figure() plot_weights(array([x[1][i] for x in a]), m.data.columns[:-1]) title(ps[i][0][0] + ' vs ' + ps[i][0][1]) xlabel('Epoch') ylabel('Weight') if len(a[0][0]) > 1: figure() plot(array([x[0] for x in a])) legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1)) ylim(-0.1, 1.1) title('Individual Perceptron Accuracy') xlabel('Epoch') ylabel('% Accuracy') figure() plot(array([array(x[0]).mean() for x in a])) title('Mean Accuracy') xlabel('Epoch') ylabel('% Accuracy') return m, a def show_resuls(m, data, a): # Plot the accuracies over epochs ps = m.perceptrons.items() print 'Weights vs Epochs for each perceptron:' for i in range(len(a[0][1])): f = figure() plot_weights(array([x[1][i] for x in a]), data.columns[:-1]) title(ps[i][0][0] + ' vs ' + ps[i][0][1]) xlabel('Epoch') ylabel('Weight') display(f) print 'Accuracies:' if len(a[0][0]) > 1: f = figure() plot(array([x[0] for x in a])) legend(list(l[0] + ' vs ' + l[1] for l, p in ps), loc='upper left', bbox_to_anchor=(1.01,1)) ylim(-0.1, 1.1) title('Individual Perceptron Accuracy') xlabel('Epoch') ylabel('% Accuracy') display(f) f = figure() plot(array([array(x[0]).mean() for x in a])) title('Mean Accuracy') xlabel('Epoch') ylabel('% Accuracy') display(f) import perceive as p from pylab import * #a = m.train(d, split=.7) #a def htmltitle(*args, **kwds): level = str(kwds.get('level', 2)) message = ' '.join(map(str, args)) display(HTML('' + message + '')) def try_rates(meta, data, rates): allz = [] for rate in rates: htmltitle('Learning Rate', rate) m = p.Main(meta, rate) results = m.train(data.copy(), split=None) print 'Best accuracy', m.best print 'Number of epochs:', len(results) show_resuls(m, data, results) allz.append(results) compare = DataFrame({ "Rate": Series(rates), "Epochs": Series(tuple(len(results) for results in allz)) }, columns=['Rate', 'Epochs']) htmltitle('Number of Epochs vs Learning Rate') display(compare) return allz def trials(meta, data, num, split=.7): allz = [] print 'Running', num, 'trials' accuracies = [] for i in range(num): htmltitle('Trial', i+1) m = p.Main(meta, .1) results, accuracy = m.train(data.copy(), split=split) print 'Best accuracy', m.best htmltitle('Percent missed (of the test set):', accuracy[0], '; ', accuracy[1], ' instances', level=4) accuracies.append(accuracy[0]) show_resuls(m, data, results) allz.append(results) htmltitle('Final Results') print 'Mean accuracy over', num, 'runs:', 1 - sum(accuracies)/float(num) return allz def runthis(fname, split=.7): data, meta = loadarff(fname) print 'Running perceptron classification on', fname, 'with a testing split of', split #figsize(12, 6) rcParams['figure.figsize'] = (6, 3) return trials(meta, DataFrame(data), 5, split) data, meta = loadarff('./ex_1.arff') try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]); data, meta = loadarff('./ex_2.arff') try_rates(meta, DataFrame(data), [1000, 100, 10, 1, .01, .001]); data, meta = loadarff('./ex_1.arff') data = DataFrame(data) m = p.Main(meta, .1) history = m.train(data) n = p.normalizers(data, meta) data = p.normalize(n, data) f = figure() #data[data.columns[:2]].plot() cs = data[data.major == 'cs'] scatter(cs.friends, cs.age, color='r') law = data[data.major == 'law'] scatter(law.friends, law.age, color='b') wx, wy, bias = m.best_weights[0] y1 = min(data[data.columns[1]]) y2 = max(data[data.columns[1]]) x1 = (-bias - y1 * wy) / wx x2 = (-bias - y2 * wy) / wx legend(('CS', 'Law')) plot([x1, x2], [y1, y2]) xlabel('Friends (normalized)') ylabel('Age (normalized)') title('Friends vs Age of CS and Law students') display(f) data, m.best_weights data, meta = loadarff('./ex_2.arff') data = DataFrame(data) m = p.Main(meta, .1) history = m.train(data) n = p.normalizers(data, meta) data = p.normalize(n, data) f = figure() #data[data.columns[:2]].plot() cs = data[data.hair_color == 'brown'] scatter(cs.cats, cs.age, color='r') law = data[data.hair_color == 'blond'] scatter(law.cats, law.age, color='b') wx, wy, bias = m.best_weights[0] x1 = min(data[data.columns[0]]) x2 = max(data[data.columns[0]]) y1 = (-bias - x1 * wx) / wy y2 = (-bias - x2 * wx) / wy legend(('Brown', 'Blond')) plot([x1, x2], [y1, y2]) xlabel('Number of Cats (normalized)') ylabel('Age (normalized)') title('Hair Color of Cat Owners') display(f) data, m.best_weights from base64 import encodestring import matplotlib from IPython.display import display, HTML from IPython.core.pylabtools import print_figure def png2x(fig): """render figure to 2x PNG via HTML""" x,y = matplotlib.rcParams['figure.figsize'] dpi = matplotlib.rcParams['savefig.dpi'] x2x = int(x * dpi / 2) y2x = int(y * dpi / 2) png = print_figure(fig, 'png') png64 = encodestring(png).decode('ascii') return u"" % (png64) ip = get_ipython() html_formatter = ip.display_formatter.formatters['text/html'] html_formatter.for_type(matplotlib.figure.Figure, png2x) import matplotlib plot matplotlib.rcParams['lines.linewidth'] = .5 matplotlib.rcParams['axes.linewidth'] = .5 matplotlib.rcParams['patch.linewidth'] = .5 matplotlib.rcParams['font.size'] = 8 matplotlib.rcParams['figure.facecolor'] = 'white' grey = [.6]*3 dgrey = [.3]*3 matplotlib.rcParams['xtick.color'] = dgrey matplotlib.rcParams['ytick.color'] = dgrey matplotlib.rcParams['axes.edgecolor'] = grey matplotlib.rcParams['axes.facecolor'] = [.99]*3 iris_res = runthis('iris.arff') # for trying out styles f = figure() plot([x[1][2] for x in iris_res[0]]) display(f) vote_res = runthis('./votingMissingValuesReplaced.arff')