from ggplot import * import pandas as pd import numpy as np %pylab inline %load_ext rpy2.ipython %%R library(ggplot2) x = np.random.uniform(0,1,200) y = np.random.uniform(0,1,200) df = pd.DataFrame({'x':x, 'y':y}) %Rpush df %%R -w 1200 -o df ggplot() + geom_point(aes(x,y), data=df, size=5) + ggtitle("TSP points") df = pd.DataFrame({'x':x, 'y':y}) perm = np.random.permutation(np.arange(0,df.shape[0],1)) df['order'] = perm df_perm = df.sort('order') %Rpush df_perm %%R -w 1200 -o df_perm p = ggplot() + geom_path(aes(x,y), data=df_perm, size=1) p + geom_point(aes(x,y), data=df_perm, size=4) + ggtitle("TSP points") def distance(df): dx = df['x'] - df['x'].shift() dy = df['y'] - df['y'].shift() return sum(sqrt(dx**2+dy**2)) rand_scores = [] for i in range(1000): df['order'] = np.random.permutation(np.arange(0,df.shape[0],1)) df = df.sort('order') rand_scores.append(distance(df)) dist_df = pd.DataFrame({'dist': np.array(rand_scores)}) %Rpush dist_df %%R -w 1200 -o dist_df p = ggplot() + geom_histogram(aes(dist), data=dist_df, binwidth=0.5) p + ggtitle("Histogram of Random RSP Distances") dim = 6 df_perm = df df_perm['xcut'] = pd.cut(df_perm.x, dim, labels=range(dim)) df_perm['ycut'] = pd.cut(df_perm.y, dim, labels=range(dim)) df_perm = df_perm.sort(['xcut','ycut']) df_perm.order = np.arange(0, df_perm.shape[0], 1) df_perm = pd.concat([df_perm, df_perm.head(1)]) print(df_perm.head(15)) print(distance(df_perm)) %Rpush df_perm %%R -w 1200 -o df_perm p = ggplot() + geom_path(aes(x,y), data=df_perm, size=1) p + geom_point(aes(x,y), data=df_perm, size=4) + ggtitle("TSP points") def boxer1(dim): df_perm = df df_perm['xcut'] = pd.cut(df_perm.x, dim, labels=range(dim)) df_perm['ycut'] = pd.cut(df_perm.y, dim, labels=range(dim)) df_perm = df_perm.sort(['xcut','ycut']) df_perm.order = np.arange(0, df_perm.shape[0], 1) return(pd.concat([df_perm, df_perm.head(1)])) pltr = pd.DataFrame({'x':range(1,50), 'y':[distance(boxer1(i)) for i in range(1,50)]}) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_path(aes(x,y), data=pltr, size=1) p + ggtitle("TSP performance over box dimensions") pltr = boxer1(8) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_path(aes(x,y), data=pltr, size=1) p + geom_point(aes(x,y), data=pltr, size=4) + ggtitle("TSP points") pltr = boxer1(40) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_path(aes(x,y), data=pltr, size=1) p + geom_point(aes(x,y), data=pltr, size=4) + ggtitle("TSP points") x = np.random.uniform(0,1,2000) y = np.random.uniform(0,1,2000) df = pd.DataFrame({'x':x, 'y':y}) pltr = pd.DataFrame({'x':range(1,50), 'y':[distance(boxer1(i)) for i in range(1,50)]}) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_path(aes(x,y), data=pltr, size=1) p + ggtitle("TSP performance over box dimensions") pltr = boxer1(40) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_path(aes(x,y), data=pltr, size=1) p + geom_point(aes(x,y), data=pltr, size=3) + ggtitle("TSP points") def sign(x): if x % 2 == 0: return(-1) return(1) def boxer2(dim): df_perm = df df_perm['xcut'] = pd.cut(df_perm.x, dim, labels=range(dim)) df_perm['ycut'] = pd.cut(df_perm.y, dim, labels=range(dim)) df_perm['ycut'] = np.array([sign(x) for x in df_perm['xcut'] % 2])* df_perm['ycut'] df_perm = df_perm.sort(['xcut','ycut']) df_perm.order = np.arange(0, df_perm.shape[0], 1) return(pd.concat([df_perm, df_perm.head(1)])) pltr = boxer1(40) print(distance(pltr)) pltr = boxer2(40) print(distance(pltr)) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_path(aes(x,y), data=pltr, size=1) p + geom_point(aes(x,y), data=pltr, size=3) + ggtitle("TSP points") def dist(s): r = np.random.uniform(0,1.0/s,4) return(np.sqrt((r[0]-r[1])**2 + (r[2]-r[3])**2)) print mean([dist(1) for i in range(10000)]) print mean([dist(4) for i in range(10000)]) def analysis(d,p): res = 0.52140543/d*(p-1)/d/d res = res + np.sqrt((1.0/3/d)**2+(1.0/d)**2) return(res*d*d) pltr = pd.DataFrame({'x': range(1,60), 'true': [distance(boxer2(i)) for i in range(1,60)], 'pred': [analysis(i,2000) for i in range(1,60)]}) %Rpush pltr %%R -w 1200 -o pltr p = ggplot() + geom_line(aes(x,true), data=pltr, colour="red") p = p + geom_line(aes(x,pred), data=pltr, colour="blue") p + ggtitle("Heuristic Performance Prediction")