Vahid Moosavi

# Third Session: From Probability to Statistics¶

04 October 2016

### Topics to be discussed¶

• Probability Distributions
• Functions of random variables
• Law of Large Numbers
• Central Limit Theorem
• Important statistical notions
• expected value (average)
• variance
• median
• mode
• higher order statistics
• skewness and tails
• kutrosis
• momentoms
• covariance
• scatter matrix
• correlation
• Density Learning
In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
pd.__version__
import sys
from scipy import stats
%matplotlib inline
from bokeh.models import CustomJS, ColumnDataSource, Slider,TextInput
from bokeh.models import TapTool, CustomJS, ColumnDataSource
from bokeh.layouts import column
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.plotting import Figure, output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Slider
from bokeh.io import push_notebook
from bokeh.io import show
from bokeh.models import (
ColumnDataSource,
HoverTool,
LinearColorMapper,
BasicTicker,
PrintfTickFormatter,
ColorBar,
)
output_notebook()
from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider
from bokeh.plotting import Figure, output_file, show


# Some Experiments with random variables and known distributions¶

### Binomial distribution¶

In [2]:
# Imagine you play a game 10 times and you want to know your expected number of success
N = 10000
p = .1

X = [np.random.binomial(10,p) for i in range(N)]

fig =plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(X, '.r');
S_avg = np.mean(X)
print 'average number of success in {} times play is {}'.format(N,S_avg)
plt.plot([0,N],[S_avg,S_avg],linewidth=4);

plt.subplot(2,1,2)
plt.hist(X,bins=100);

average number of success in 10000 times play is 1.0141


### Geometric Distribution¶

In [3]:
N  = 1000
p = .3

# Geometric distribution is when we stop the game after the first win. Therefore, it is a measure of number of games
X = np.random.geometric(p,size=N)

fig =plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(X, '.r');
S_avg = np.mean(X)
plt.plot([0,N],[S_avg,S_avg],linewidth=4);

plt.subplot(2,1,2)
plt.hist(X,bins=100);


### Uniform Distribution¶

In [4]:
N  = 1000
l= 0
h = 10

X = np.random.uniform(low=l, high=h, size=N)

fig =plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(X, '.r');
S_avg = np.mean(X)
plt.plot([0,N],[S_avg,S_avg],linewidth=4);

plt.subplot(2,1,2)
plt.hist(X,bins=100);


### Gaussian (Normal) distribution¶

In [5]:
m = 10
s = 1
N  = 10000
X = np.random.normal(loc=m, scale=s, size=N)

fig =plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(X, '.r');
S_avg = np.mean(X)
plt.plot([0,N],[S_avg,S_avg],linewidth=4);

plt.subplot(2,1,2)
plt.hist(X,bins=100);


## A note on Functions of Random numbers¶

• imagine we know the distribution of two variables and we want to know the distribution of a function of those variables. For example, imagine we know the quality of a product of a machine depends on the temperature and pressure of tool head: ## $$y = 2t^2 + 3p^3$$

### Arithmetic of random variables are quickly get complex, but Algebra of random variables: symbolic manipulation of random variables. This goes to the direction of quantum theory¶

This is interesting, because similar to "deterministic systems", even with knowing the relations and the distributions of random variables it is not easy to "analytically" see the distribution of functions of random variables and the usual choice is to simulate

#### An example to the analytical approach:¶

In [6]:
N = 10000
x1= np.random.normal(loc=2,scale=5,size=N)[:,np.newaxis]
x2= np.random.normal(loc=0,scale=5,size=N)[:,np.newaxis]
y = x1+x2

Data= np.concatenate((x1,x2,y),axis=1)
DF = pd.DataFrame(data=Data,columns=['x1','x2','y'])

ax1= plt.subplot(3,1,1)
DF['x1'].plot(ax=ax1,kind='kde',linewidth=3,color='red',style='-')
# plt.plot(DF['x1'].values[:].mean(),0,'sr',markersize=30)
plt.plot(DF['x1'].values[:].mean(),0.001,'sk',markersize=10);

ax2= plt.subplot(3,1,2)
DF['x2'].plot(ax=ax2,kind='kde',linewidth=3,color='blue',style='-')
# plt.plot(DF['x2'].values[:].mean(),0,'sb',markersize=30)
plt.plot(DF['x2'].values[:].mean(),0.001,'sk',markersize=10);

ax3= plt.subplot(3,1,3)
DF['y'].plot(ax=ax3,kind='kde',linewidth=3,color='g',style='-')
plt.plot(DF['y'].values[:].mean(),0.001,'sk',markersize=10);


## Adding a bit of nonlinearity¶

In [7]:
N = 1000

# change the center of random variables and look at the distribution of y
x1= np.random.normal(loc=5,scale=5,size=N)[:,np.newaxis]
x2= np.random.normal(loc=1,scale=5,size=N)[:,np.newaxis]

y = 2* x1*x1 +3*x2*x2

# y = -2*x1*x1 + np.random.normal(loc=1.0, scale=5.1, size=N)[:,np.newaxis]

# y = x1/x1[::-1]
Data= np.concatenate((x1,x2,y),axis=1)
DF = pd.DataFrame(data=Data,columns=['x1','x2','y'])

ax1= plt.subplot(3,1,1)
DF['x1'].plot(ax=ax1,kind='kde',linewidth=3,color='red',style='-')
# plt.plot(DF['x1'].values[:].mean(),0,'sr',markersize=30)
plt.plot(DF['x1'].values[:].mean(),0.001,'sk',markersize=10);

ax2= plt.subplot(3,1,2)
DF['x2'].plot(ax=ax2,kind='kde',linewidth=3,color='blue',style='-')
# plt.plot(DF['x2'].values[:].mean(),0,'sb',markersize=30)
plt.plot(DF['x2'].values[:].mean(),0.001,'sk',markersize=10);

ax3= plt.subplot(3,1,3)
DF['y'].plot(ax=ax3,kind='kde',linewidth=3,color='g',style='-')
plt.plot(DF['y'].values[:].mean(),0.001,'sk',markersize=10);


## ¶

However, in Data Driven Modeling we are one step behind this.

There, we have another problem, where in addition to random variables, we have "uncertain relations".

### This means:¶

• We don't know the important variables toward our question.
• Their distributions are not known
• And the inter-relations are not known too.
• But we just have data

## This is the shift from probability theory to statistics and later to machine learning¶

#### Therefore, historically, there have been many theoretical works, in order to use probabilty theory, when we only have observations. Two of the most important works are:¶

• Law of Large Numbers
• Central Limit Theorem

# Law of Large Numbers¶

https://en.wikipedia.org/wiki/Law_of_large_numbers

# ¶

flipping a coin sequentially and independently

## A bit of history¶

From Wikipedia: The Italian mathematician Gerolamo Cardano (1501â€“1576) stated without proof that the accuracies of empirical statistics tend to improve with the number of trials. This was then formalized as a law of large numbers. A special form of the LLN (for a binary random variable) was first proved by Jacob Bernoulli.It took him over 20 years to develop a sufficiently rigorous mathematical proof which was published in his Ars Conjectandi (The Art of Conjecturing) in 1713. He named this his "Golden Theorem" but it became generally known as "Bernoulli's Theorem". This should not be confused with Bernoulli's principle, named after Jacob Bernoulli's nephew Daniel Bernoulli. In 1837, S.D. Poisson further described it under the name "la loi des grands nombres" ("The law of large numbers"). Thereafter, it was known under both names, but the "Law of large numbers" is most frequently used.

# ¶

### Flipping a coin simulation¶

In [8]:
#Here, probability is a matter of belief
def cointoss_LLN(n_times=50):
#We flip "n_times" a coin assuming the chance of wining is p
p = .5
outcomes = [np.random.binomial(1,np.random.rand()) for i in range(n_times)]
#     outcomes = [np.random.binomial(1,p) for i in range(n_times)]
avg_P_H = (np.cumsum(outcomes)).astype(float)/range(1,n_times+1)
return avg_P_H

#Here we expect .5
#Change the n_times and see what happens
avg_P_H = cointoss_LLN(n_times=1000)

fig =plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(range(len(avg_P_H)),avg_P_H,'.-r',markersize=.2,linewidth=1.2);
plt.xlabel('iteration')
plt.ylabel('probability so far')
plt.subplot(2,1,2)
plt.hist(avg_P_H,bins=200);


# Central Limit Theorem (CLT)¶

## A bit of history¶

From wikipedia: The central limit theorem has an interesting history. The first version of this theorem was postulated by the French-born mathematician Abraham de Moivre who, in a remarkable article published in 1733, used the normal distribution to approximate the distribution of the number of heads resulting from many tosses of a fair coin. This finding was far ahead of its time, and was nearly forgotten until the famous French mathematician Pierre-Simon Laplace rescued it from obscurity in his monumental work ThÃ©orie analytique des probabilitÃ©s, which was published in 1812. Laplace expanded De Moivre's finding by approximating the binomial distribution with the normal distribution. But as with De Moivre, Laplace's finding received little attention in his own time. It was not until the nineteenth century was at an end that the importance of the central limit theorem was discerned, when, in 1901, Russian mathematician Aleksandr Lyapunov defined it in general terms and proved precisely how it worked mathematically. Nowadays, the central limit theorem is considered to be the unofficial sovereign of probability theory.

## Important point:¶

• CLT is about the distribution of mean values of any large sample
• CLT is the essense of majority of statistical tests
In [9]:
def run_experiment_N_time(n_each_test=500,n_repeat_test=100,test='bernoulli'):
if test== 'bernoulli':
p = .5
all_P_H = [np.random.binomial(n_each_test,p)/float(n_each_test) for i in range(n_repeat_test)]
return all_P_H

if test== 'uniform':
l = 0
u = 10
all_P_H = [np.random.uniform(low=l, high=u, size=n_each_test).mean() for i in range(n_repeat_test)]
return all_P_H

if test== 'geometric':
p = .1
all_P_H = [np.random.geometric(p,size=n_each_test).mean() for i in range(n_repeat_test)]
return all_P_H

if test== 'gaussian':
m = 0
s = 1
all_P_H = [np.random.normal(loc=m, scale=s, size=n_each_test).mean() for i in range(n_repeat_test)]
return all_P_H

In [10]:
n_each_test = 400

#Increase the number of observations
n_repeat_test = 10000
# all_P_H = [cointoss(n_times=n_each_test) for i in range(n_repeat_test)]

all_P_H = run_experiment_N_time(n_each_test=n_each_test,n_repeat_test=n_repeat_test,test='bernoulli')
# all_P_H = run_experiment_N_time(n_each_test=n_each_test,n_repeat_test=n_repeat_test,test='uniform')
# all_P_H = run_experiment_N_time(n_each_test=n_each_test,n_repeat_test=n_repeat_test,test='geometric')
# all_P_H = run_experiment_N_time(n_each_test=n_each_test,n_repeat_test=n_repeat_test,test='gaussian')
fig = plt.figure(figsize=(10,10));
plt.subplot(2,1,1);
plt.plot(all_P_H,'.');
plt.xlabel('Iterations');

plt.subplot(2,1,2);
plt.hist(all_P_H,bins=100,normed=True);

from scipy import stats
kernel = stats.gaussian_kde(all_P_H)
mn = np.min(all_P_H)
mx = np.max(all_P_H)
val_grids = np.linspace(mn,mx,num=1000).ravel()
Z = kernel(val_grids)

plt.plot(val_grids,Z,linewidth=5);


# Important terms in statistics (for single variable)¶

• expected value (average)
• variance
• median
• mode
• box plot
• histograms

• higher order statistics

• skewness and tails
• kutrosis
• momentoms
In [11]:
N  = 10000
X = np.random.normal(loc=0,scale=3,size=N)[:,np.newaxis]
# X = np.random.beta(1,5,size=N)[:,np.newaxis]
# X = np.random.uniform(low=0, high=4, size=N)[:,np.newaxis]

val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);
a = plt.hist(X,bins=20,normed=True,color='gray',alpha=.2, edgecolor='gray');

plt.plot([np.mean(X),np.mean(X)],[0,Z.max()],'r',label='mean',linewidth=5);
plt.plot([np.median(X),np.median(X)],[0,Z.max()],'g',label='median',linewidth=5);
plt.plot([val_grids[np.argmax(Z)],val_grids[np.argmax(Z)]],[0,Z.max()],'b',label='mode',linewidth=5);

X = np.random.normal(loc=0,scale=5,size=N)[:,np.newaxis]
val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);

X = np.random.normal(loc=4,scale=5,size=N)[:,np.newaxis]
val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);

X = np.random.normal(loc=-4,scale=5,size=N)[:,np.newaxis]
val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);

plt.legend(bbox_to_anchor=(1.3,1.));

In [12]:
N  = 10000
X = np.random.normal(loc=0,scale=1,size=N)[:,np.newaxis]
X = np.random.beta(1,5,size=N)[:,np.newaxis]
# X = np.random.uniform(low=0, high=4, size=N)[:,np.newaxis]

val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);
a = plt.hist(X,bins=100,normed=True,color='w');

plt.plot([np.mean(X),np.mean(X)],[0,Z.max()],'r',label='mean',linewidth=5);
plt.plot([np.median(X),np.median(X)],[0,Z.max()],'g',label='median',linewidth=5);
plt.plot([val_grids[np.argmax(Z)],val_grids[np.argmax(Z)]],[0,Z.max()],'b',label='mode',linewidth=5);
# plt.plot([a[1][np.argmax(a[0])],a[1][np.argmax(a[0])]],[0,Z.max()],'b',label='mode',linewidth=5);

plt.legend(bbox_to_anchor=(1.3,1.));


# $$m_k = \frac{1}{n} \sum_{i = 1}^n (x_i - \bar{x})^k$$¶

In [13]:
N  = 10000

X = np.random.normal(loc=1,scale=.3,size=N)[:,np.newaxis]
# X = np.random.beta(1,5,size=N)[:,np.newaxis]
# X = np.random.uniform(low=0, high=4, size=N)[:,np.newaxis]

val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);
# a = plt.hist(X,bins=100,normed=True,color='w');
a = plt.hist(X,bins=20,normed=True,color='gray',alpha=.2, edgecolor='gray');

plt.legend(bbox_to_anchor=(1.3,1.));

In [14]:
print 'mean value is {}'.format(X.mean())

print 'First moement {}'.format(stats.moment(X,moment=1)[0])

print 'Second moement: to show the level of variance to the first moment '
print 'variance is {}'.format(np.power(np.std(X),2))
print 'Variance is {}'.format(stats.moment(X,moment=2)[0])
print '\n'

print 'Third moment: to measure the skewness (inequity) of the distribution'
print 'skewness is {}'.format(stats.skew(X)[0])
print 'skewness is {}'.format(stats.moment(X,moment=3)[0]/np.power(np.std(X),3))
print 'skewness is {}'.format(stats.moment(X,moment=3)[0]/np.power(stats.moment(X,moment=2)[0],3/2.))

print '\n'

print 'Fourth moement: to show how compact (tailed) is the distribution'
print 'Kurtosis is {}'.format(stats.kurtosis(X)[0])
print 'Kurtosis is {}'.format(stats.moment(X,moment=4)[0]/np.power(stats.moment(X,moment=2)[0],2)-3)

mean value is 0.997644698289
First moement 0.0
Second moement: to show the level of variance to the first moment
variance is 0.0898649110884
Variance is 0.0898649110884

Third moment: to measure the skewness (inequity) of the distribution
skewness is -0.0326342895173
skewness is -0.0326342895173
skewness is -0.0326342895173

Fourth moement: to show how compact (tailed) is the distribution
Kurtosis is -0.0315849298331
Kurtosis is -0.0315849298331


# $$m_k = \frac{1}{n} \sum_{i = 1}^n (x_i - \bar{x})^k$$¶

In [15]:
def dist_c(X,c):
return np.power((X-c),2)
#     return np.abs(X-c)

val_grids = np.linspace(X.min(),X.max(),num=20000)
# X1  = dist_c(X,0)

Optimum_p = []
Optimum_ind = []
plt.subplot(3,2,1)
Dists = [dist_c(X,c) for c in val_grids]
#First moment
mean_dist = [np.mean(D) for D in Dists]
plt.plot(val_grids,mean_dist,'.')
print val_grids[np.argmin(mean_dist)]
Optimum_p.append(val_grids[np.argmin(np.abs(mean_dist))])
Optimum_ind.append(np.argmin(np.abs(mean_dist)))

plt.subplot(3,2,2)

second_m = [np.std(D) for D in Dists]
# second_m = [stats.moment(D,moment=2) for D in Dists]
plt.plot(val_grids,second_m,'.')
print val_grids[np.argmin(np.abs(second_m))]
Optimum_p.append(val_grids[np.argmin(np.abs(second_m))])
Optimum_ind.append(np.argmin(np.abs(second_m)))

plt.subplot(3,2,3)
third_m = [stats.skew(D)[0] for D in Dists]
# third_m = [stats.moment(D,moment=3)[0]/np.power(np.std(D),3) for D in Dists]

plt.plot(val_grids,np.abs(third_m),'.')
print val_grids[np.argmin(np.abs(third_m))]
Optimum_p.append(val_grids[np.argmin(np.abs(third_m))])
Optimum_ind.append(np.argmin(np.abs(third_m)))

plt.subplot(3,2,4)

fourth_m = [stats.kurtosis(D)[0] for D in Dists]
# fourth_m = [stats.moment(D,moment=4) for D in Dists]
plt.plot(val_grids,fourth_m,'.')
print val_grids[np.argmin(np.abs(fourth_m))]

Optimum_p.append(val_grids[np.argmin(np.abs(fourth_m))])
Optimum_ind.append(np.argmin(np.abs(fourth_m)))

0.997636319191
0.992760840188
-0.122703402556
-0.122703402556

In [16]:
val_grids = np.linspace(X.min(),X.max(),num=200)
kernel = stats.gaussian_kde(X[:,0])
Z = kernel(val_grids)
plt.plot(val_grids,Z,linewidth=5);
a = plt.hist(X,bins=100,normed=True,color='w');

plt.plot([np.mean(X),np.mean(X)],[0,Z.max()],'r',label='mean',linewidth=5);
plt.plot([np.median(X),np.median(X)],[0,Z.max()],'g',label='median',linewidth=5);
plt.plot([val_grids[np.argmax(Z)],val_grids[np.argmax(Z)]],[0,Z.max()],'b',label='mode',linewidth=5);
# plt.plot([a[1][np.argmax(a[0])],a[1][np.argmax(a[0])]],[0,Z.max()],'b',label='mode',linewidth=5);

plt.plot([Optimum_p[0],Optimum_p[0]],[0,Z.max()],'--',label='first',linewidth=5);
plt.plot([Optimum_p[1],Optimum_p[1]],[0,Z.max()],'--',label='second',linewidth=5);
plt.plot([Optimum_p[2],Optimum_p[2]],[0,Z.max()],'--',label='third',linewidth=5);
plt.plot([Optimum_p[3],Optimum_p[3]],[0,Z.max()],'--',label='fourth',linewidth=5);

plt.legend(bbox_to_anchor=(1.3,1.));

In [17]:
# Now we want to see the distributions around the optimum values of each momentum as a function

fig = plt.figure(figsize=(10,10))

plt.subplot(2,2,1);
sample = Dists[Optimum_ind[0]]
plt.hist(sample,bins=100,normed=True);
print 'first momentum optimization: minimizing the average'
print 'mean: {}, std: {}, skewness: {}, kurtosis: {}'.format(sample.mean(),np.std(sample),stats.skew(sample)[0],stats.kurtosis(sample)[0])
plt.title('minimizing the average distance')

plt.subplot(2,2,2);
sample = Dists[Optimum_ind[1]]
plt.hist(sample,bins=100,normed=True);
print 'second momentum optimization: minimizing the variance'
print 'mean: {}, std: {}, skewness: {}, kurtosis: {}'.format(sample.mean(),np.std(sample),stats.skew(sample)[0],stats.kurtosis(sample)[0])
plt.title('minimizing the variance')

plt.subplot(2,2,3);
sample = Dists[Optimum_ind[2]]
plt.hist(sample,bins=100,normed=True);
print 'third momentum optimization: minimizing the inequity'
print 'mean: {}, std: {}, skewness: {}, kurtosis: {}'.format(sample.mean(),np.std(sample),stats.skew(sample)[0],stats.kurtosis(sample)[0])
plt.title('minimizing the inequity (skewness)')

plt.subplot(2,2,4);
sample = Dists[Optimum_ind[3]]
plt.hist(sample,bins=100,normed=True);
print 'fourth momentum optimization: maximizing the compactness'
print 'mean: {}, std: {}, skewness: {}, kurtosis: {}'.format(sample.mean(),np.std(sample),stats.skew(sample)[0],stats.kurtosis(sample)[0])
plt.title('maximizing the compactness');

first momentum optimization: minimizing the average
mean: 0.0898649111586, std: 0.126080547907, skewness: 2.70821458939, kurtosis: 10.3585507896
second momentum optimization: minimizing the variance
mean: 0.0898887631584, std: 0.126046552608, skewness: 2.70678236579, kurtosis: 10.3434293637
third momentum optimization: minimizing the inequity
mean: 1.34504477816, std: 0.680546147662, skewness: 0.736880777207, kurtosis: 0.682330921564
fourth momentum optimization: maximizing the compactness
mean: 1.34504477816, std: 0.680546147662, skewness: 0.736880777207, kurtosis: 0.682330921564


#### What does these measure imply for example in the context of governance?¶

• socialism?
• capitalism?
• Income distribution for example

# ¶

## Interpreting the higher order statistics from the point of view of optimization¶

• What does sample mean in terms of optimization?
• what do each of momentums mean?
• Sample mean is the optimum value if we minimize the first order momentum of data!
• Equity objectives

### Another example with higher dimensional data¶

* Imagine the dots are the distribution of the houses in a city.
* And we are interested to locate one shop in the city.
In [31]:
def measure(n):
"Measurement model, return two coupled measurements."
m1 = np.random.normal(loc=1,scale=1.5, size=n)[:,np.newaxis]
m2 = np.random.normal(loc=1,scale=1.0, size=n)[:,np.newaxis]
return m1+m2, m1-m2

x, y = measure(20)
xmin = x.min()
xmax = x.max()
ymin = y.min()
ymax = y.max()

plt.plot(x,y,'ok',markersize=10);

In [32]:
# Without Bokeh
# def plot_dists(cx,cy):
#     fig, ax = plt.subplots();
#     ax.plot(x,y,'ok',markersize=10)
#     ax.plot(cx,cy,'or', markersize=15);
#     [ax.plot([cx,x[i]],[cy,y[i]],'--r') for i in range(len(x))];
#     print np.sum(cx-x + cy-y)

# print 'mean values: ', x.mean(), y.mean()
# from ipywidgets import interact, HTML, FloatSlider
# interact(plot_dists,cx=(x.mean()-3,x.mean()+3,.1),cy=(y.mean()-3,y.mean()+3,.1));

In [33]:
cx = np.asarray([1 for i in range(len(x))])[:,np.newaxis]
cy = np.asarray([1 for i in range(len(x))])[:,np.newaxis]
df = pd.DataFrame(data=np.concatenate((x,y,cx,cy),axis=1),columns=['x','y','cx','cy'])
source = ColumnDataSource(data=df)
p = figure(plot_width=400, plot_height=400,title="")
p.circle('x', 'y', size=15,source=source, line_color="navy", fill_color="red", fill_alpha=0.99)
p.circle('cx', 'cy', size=15, source=source,line_color="navy", fill_color="blue", fill_alpha=0.99)
p.segment(x0='x', y0='y', x1='cx', y1='cy', color='blue', alpha=0.6, line_width=1, source=source, )

p.title.text = "sum of distances: " +str(np.sum(cx-x + cy-y))

def callback1(source=source,p=p, window=None):
data = source.data
f = cb_obj.value
x, y = data['x'], data['y']
cx = data['cx']
cy = data['cy']
s = 0
for i in range(len(x)):
s = s + cy[i]-y[i] + cx[i]-x[i]
cx[i] = f
p.title.text = "sum of distances: " +str(s)
source.change.emit()

slider1 = Slider(start=0.1, end=4, value=1, step=.1, title="cx",
callback=CustomJS.from_py_func(callback1))

def callback2(source=source,p=p, window=None):
data = source.data
f = cb_obj.value
x, y = data['x'], data['y']
cx = data['cx']
cy = data['cy']
s = 0
for i in range(len(x)):
s = s + cy[i]-y[i] + cx[i]-x[i]
cy[i] = f
p.title.text = "sum of distances: " +str(s)
source.change.emit()

slider2 = Slider(start=0.1, end=4, value=1, step=.1, title="cy",
callback=CustomJS.from_py_func(callback2))

layout = column(slider1,slider2, p)

show(layout)

In [35]:
#Now let's generalize this approach for all the other HOS measures

def measure(n):
"Measurement model, return two coupled measurements."
m1 = np.random.normal(loc=1,scale=1.5, size=n)[:,np.newaxis]
m2 = np.random.normal(loc=1,scale=0.8, size=n)[:,np.newaxis]
return m1+m2, m1-m2

m1, m2 = measure(600)
X = np.concatenate((m1,m2),axis=1)
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m2.max()

Xg, Yg = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
val_grids = np.vstack([Xg.ravel(), Yg.ravel()]).T
plt.plot(m1,m2,'.');

In [36]:
import scipy.spatial.distance as DIST
fig = plt.figure(figsize=(10,10))
Optimum_p = []
Optimum_ind = []
Dists = DIST.cdist(val_grids,X)

plt.subplot(2,2,1)
plt.plot(m1,m2,'.')
plt.subplot(2,2,2)
plt.plot(m1,m2,'.')
plt.subplot(2,2,3)
plt.plot(m1,m2,'.')
plt.subplot(2,2,4)
plt.plot(m1,m2,'.')

plt.subplot(2,2,1)
#First moment
mean_dist = np.abs([np.mean(D) for D in Dists])
# plt.plot(val_grids,mean_dist,'.')
mn = np.min(mean_dist)
mx = np.max(mean_dist)
rng = mx-mn
var = (mean_dist-mn)/float(rng)
plt.scatter(val_grids[:,0],val_grids[:,1],marker='o',s=5,edgecolor=plt.cm.RdYlBu_r(var) ,vmin=mn,vmax=mx,facecolor='None')
# plt.scatter(val_grids[:,0],val_grids[:,1],s=20, c=mean_dist,ma)
print val_grids[np.argmin(mean_dist)]
Optimum_p.append(val_grids[np.argmin(np.abs(mean_dist))])
Optimum_ind.append(np.argmin(np.abs(mean_dist)))
plt.title('minimizing the average distance')

plt.subplot(2,2,2)

second_m = np.abs([np.std(D) for D in Dists])
# second_m = [stats.moment(D,moment=2) for D in Dists]

# plt.plot(val_grids,second_m,'.')

mn = np.min(second_m)
mx = np.max(second_m)
rng = mx-mn
var = (second_m-mn)/float(rng)
plt.scatter(val_grids[:,0],val_grids[:,1],marker='o',s=2,edgecolor=plt.cm.RdYlBu_r(var) ,vmin=mn,vmax=mx,facecolor='None')

print val_grids[np.argmin(np.abs(second_m))]
Optimum_p.append(val_grids[np.argmin(np.abs(second_m))])
Optimum_ind.append(np.argmin(np.abs(second_m)))

plt.title('minimizing the variance')

plt.subplot(2,2,3)
third_m = np.abs([stats.skew(D) for D in Dists])
# third_m = [stats.moment(D,moment=3)[0]/np.power(np.std(D),3) for D in Dists]

mn = np.min(third_m)
mx = np.max(third_m)
rng = mx-mn
var = (third_m-mn)/float(rng)
plt.scatter(val_grids[:,0],val_grids[:,1],marker='o',s=5,edgecolor=plt.cm.RdYlBu_r(var) ,vmin=mn,vmax=mx,facecolor='None')

# plt.plot(val_grids,np.abs(third_m),'.')
print val_grids[np.argmin(np.abs(third_m))]
Optimum_p.append(val_grids[np.argmin(np.abs(third_m))])
Optimum_ind.append(np.argmin(np.abs(third_m)))
plt.title('minimizing the inequity (skewness)')

plt.subplot(2,2,4)

fourth_m = np.abs([stats.kurtosis(D) for D in Dists])
# fourth_m = [stats.moment(D,moment=4) for D in Dists]
# plt.plot(val_grids,fourth_m,'.')
mn = np.min(fourth_m)
mx = np.max(fourth_m)
rng = mx-mn
var = (fourth_m-mn)/float(rng)
plt.scatter(val_grids[:,0],val_grids[:,1],marker='o',s=5,edgecolor=plt.cm.RdYlBu_r(var) ,vmin=mn,vmax=mx,facecolor='None')

plt.title('maximizing the compactness')

print val_grids[np.argmin(np.abs(fourth_m))]

Optimum_p.append(val_grids[np.argmin(np.abs(fourth_m))])
Optimum_ind.append(np.argmin(np.abs(fourth_m)))
plt.tight_layout()

[ 2.14668261 -0.01525667]
[-2.69139217  5.36623937]
[ 7.30729572  4.70995936]
[-2.69139217 -4.2154487 ]