Vahid Moosavi

# Eighth Session¶

22 November 2016

### Topics to be discussed¶

• Markov Chains
In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
pd.__version__
import sys
from scipy import stats
import time
import  pysparse
from scipy.linalg import norm
%matplotlib inline


# We discuss Markov Chains from the following aspects:¶

• From the point of view of dynamical systems
• From the point of view of object representation
• Properties and applications
• Extensions to machine learning applications

# Random walk¶

In [83]:
direction
state=0
all_states= [0]
all_walks= []
n_time  = 1000
for i in range(n_time):
walk = np.random.choice([-1,1],size=1)[0]
state = state + walk
all_states.append(state)
all_walks.append(walk)
plt.plot(range(n_time+1),all_states,'o-',color='gray')

Out[83]:
[<matplotlib.lines.Line2D at 0x164b94690>]

# Usually we have a matrix, called transition matrix, M¶

### Where M has certain conditions:¶

• Matrix M has equal rows and columns: M is nxn
• n is the number of states in the markov chain
• Row (column) Stochastic: Sum of rows (columns) are equal to one
• non-negative elements
• There is always a directed graph associated to M

### A very basic example, with two states¶

In [85]:
# state space: Rainy, sunny
M =np.asarray([[.9,.1],[.6,.4]])

print(np.matrix(M))
print

# any arbitrary initial point
x = np.asarray([0,1])

print "next probable state:"
print x.dot(M)

[[ 0.9  0.1]
[ 0.6  0.4]]

next probable state:
[ 0.6  0.4]

In [86]:
print x
for i in range(20):
x = x.dot(M)
print x
print x.shape

[0 1]
[ 0.6  0.4]
[ 0.78  0.22]
[ 0.834  0.166]
[ 0.8502  0.1498]
[ 0.85506  0.14494]
[ 0.856518  0.143482]
[ 0.8569554  0.1430446]
[ 0.85708662  0.14291338]
[ 0.85712599  0.14287401]
[ 0.8571378  0.1428622]
[ 0.85714134  0.14285866]
[ 0.8571424  0.1428576]
[ 0.85714272  0.14285728]
[ 0.85714282  0.14285718]
[ 0.85714284  0.14285716]
[ 0.85714285  0.14285715]
[ 0.85714286  0.14285714]
[ 0.85714286  0.14285714]
[ 0.85714286  0.14285714]
[ 0.85714286  0.14285714]
(2,)


# Usual Assumptions:¶

• Time homogeneous transition matrix
• Finite states
• Discrete-time

# Now the data driven part:¶

## Of course, if we have the underlying rules (i.e. transition probabilites), we can use markov chain properties, but...¶

The beauty of Markov chain is that it is super easy to be constructed from data

# how to construct the Markov Chain?¶

In [77]:
def buildTM_from_sequential_data(data,states,irreducible=True):
# each row is a sequence of observation
n = len(states)
M = np.zeros((n,n))
for d in data:
for k in range(1,len(d)):
i = d[k-1]

j = d[k]
M[i,j]= M[i,j] + 1

eps = .001
for i in range(M.shape[0]):
s= sum(M[i])

if s==0:
if irreducible==True:
M[i]=eps
M[i,i]=1.
s= sum(M[i])
M[i]=np.divide(M[i],s)
else:
M[i,i]=1.
else:
M[i]=np.divide(M[i],s)
return M

# Power iteration Method
def simulate_markov(TM,verbose='on'):
e1 = time.time()
states_n = TM.shape[0]
pi = np.ones(states_n);  pi1 = np.zeros(states_n);
pi = np.random.rand(states_n)

pi = pi/pi.sum()
n = norm(pi - pi1); i = 0;
diff = []
while n > 1e-6 and i <1*1e4 :
pi1 = TM.T.dot(pi).copy()
n = norm(pi - pi1); i += 1
diff.append(n)
pi = pi1.copy()
if verbose=='on':
print "Iterating {} times in {}".format(i, time.time() - e1)

mixing_ = i
return pi1,mixing_


## $$x = xM$$¶

In [89]:
# M =np.asarray([[1,.0],[.8,.2]])

sz = 4
n_state = 6
data = np.random.randint(0,high=n_state,size=sz).reshape(1,sz)
states = np.arange(0,n_state)

# If it is irreducible, it will have a steady state alway, other wise, it might oscilate
M = buildTM_from_sequential_data(data,states,irreducible=False)

print(np.matrix(M))
print

# any arbitrary initial point
x = np.asarray([0,1])
x = np.random.rand(len(states))

x = x/x.sum()
all_x = []
all_x.append(x)
for i in range(20):
x = x.dot(M)
all_x.append(x)
print x

all_x = np.asarray(all_x)
for i in range(all_x.shape[1]):
plt.plot(range(len(all_x)),all_x[:,i])

[[ 0.  0.  0.  0.  0.  1.]
[ 0.  1.  0.  0.  0.  0.]
[ 0.  0.  1.  0.  0.  0.]
[ 0.  0.  0.  1.  0.  0.]
[ 1.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.  0.]]

[ 0.20000934  0.18171897  0.17723441  0.30698769  0.08627169  0.04777791]


# Now examples with Dynamical systems¶

### Lorenz curve¶

In [9]:
import numpy as np
from scipy import integrate

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import cnames
from matplotlib import animation
%matplotlib inline
N_trajectories = 1000

def lorentz_deriv((x, y, z), t0, sigma=10., beta=8./3, rho=28.0):
"""Compute the time-derivative of a Lorentz system."""
return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z]

# Choose random starting points, uniformly distributed
x0 = -10 + 30 * np.random.random((N_trajectories, 3))

# Solve for the trajectories
t = np.linspace(0, 5, 500)
x_t = np.asarray([integrate.odeint(lorentz_deriv, x0i, t)
for x0i in x0])

# this is just one dimension
for i in range(100):
plt.plot(x_t[i,:,0])


## State Space explosion¶

### Some early tries I did:¶

Vahid, Moosavi, Computing With Contextual Numbers. arXiv preprint arXiv:1408.0889. (2014).

In [90]:
# Here, we assume one dimensional state space
# Coarse graining and descretizing the state space
data = x_t[:,:,0]/1
data = data.astype(int)
print np.unique(data).shape

data = data-np.min(data)

data = data[:,range(0,data.shape[1],5)]
print np.unique(data)
print data.shape
for i in range(100):
plt.plot(data[i,:])

(50,)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]
(1000, 100)

In [91]:
# frequency of visited states
f_data = data.flatten()
plt.hist(f_data,normed=True,bins=51);

In [93]:
# Now let's buid a markov chain out of this data
n_state = np.unique(data).shape[0]
states = np.arange(0,n_state)
M = buildTM_from_sequential_data(data,states,irreducible=False)
print(np.matrix(M))

[[ 0.          0.          0.         ...,  0.          0.          0.        ]
[ 0.          0.          0.         ...,  0.          0.          0.        ]
[ 0.          0.03448276  0.10344828 ...,  0.          0.          0.        ]
...,
[ 0.          0.          0.         ...,  0.          0.03846154  0.        ]
[ 0.          0.          0.         ...,  0.          0.          0.        ]
[ 0.          0.          0.         ...,  0.          0.          0.        ]]

In [94]:
# The meaning of steady state distribution
# How to interpret this?
pi, mixing_t = simulate_markov(M,verbose='on')
plt.plot(pi)

Iterating 379 times in 0.00762104988098

Out[94]:
[<matplotlib.lines.Line2D at 0x1649bbad0>]

# But now if we want to see the dynamics of a single trajectory¶

In [95]:
# any arbitrary initial point: here, it means a one-hot vector. initially only one state is one and the rest is one
x = np.asarray([0,1])
ind_initial = np.random.randint(0,n_state,size=1)
print ind_initial
x = np.zeros((1,n_state))
x[0,ind_initial] = 1
# print x

x = x/x.sum()
all_x = []
all_x.append(x)
for i in range(100):
x = x.dot(M)
all_x.append(x)

all_x = np.asarray(all_x)

[15]


# Because of the probabilistic nature of Markov Chain, it calculates all the possible combinations together.¶

In [96]:
state_pic = all_x[:,0,:].copy()
plt.imshow(state_pic.T[::-1]);


# Here, is the connecting point between Markov Chains and Markov Decision Process and Reinforcement Learning¶

In [99]:
all_inds = []
ind_initial = np.random.randint(0,n_state,size=1)

ind = ind_initial[0]
all_inds.append(ind)
for i in range(100):
ind = np.random.choice(range(M.shape[0]),size=1,p=M[ind])[0]
all_inds.append(ind)

In [100]:
plt.plot(all_inds)
plt.ylim((0,n_state))

Out[100]:
(0, 50)

# Important points:¶

## On the other hand, Markov chain has very interesting features, if one is interested in the overal macroscopic properties of dynamical systems, for example in:¶

• Water flow
• Traffic Dynamics
• Economic networks
• WWW
• Electricity Network
• Flight dynamics

# Some of the very interesting properties of Markov chains:¶

• Clustering the state space
• Mixing times
• Recurrence time
• Mean first passage time
• Kemeny Constant
• Sensitivity analysis of the transition matrix

# First Example:¶

## Transportation Dynamics¶

Vahid, Moosavi and Ludger Hovestadt. “Modeling urban traffic dynamics in coexistence with urban data streams.” Proceedings of the 2nd ACM SIGKDD International Workshop on Urban Computing. ACM, 2013. https://vahidmoosavi.files.wordpress.com/2013/09/modeling-urban-traffic-dynamics-in-coexistence-with-urban-data-streams.pdf

### This is the only data we use for modeling the dynamics of traffic in a city¶

In [22]:
from IPython.display import YouTubeVideo

Out[22]:
In [23]:
from IPython.display import YouTubeVideo

Out[23]:
In [24]:
from IPython.display import YouTubeVideo

Out[24]:

# Second Example: Macro-Economic Network¶

## Relational (networked) economic models¶

### Robert Solow 1952 had talked about the relationships between Leontief input-output models and markov chains as their algebraic formalism¶

In [25]:
economy_n = 41
industry_n = 35

In [26]:
WIO = list()
for i in range(1995,2012):
WIO.append(d.values[:])
print i, d.shape

1995 (1435, 1640)
1996 (1435, 1640)
1997 (1435, 1640)
1998 (1435, 1640)
1999 (1435, 1640)
2000 (1435, 1640)
2001 (1435, 1640)
2002 (1435, 1640)
2003 (1435, 1640)
2004 (1435, 1640)
2005 (1435, 1640)
2006 (1435, 1640)
2007 (1435, 1640)
2008 (1435, 1640)
2009 (1435, 1640)
2010 (1435, 1640)
2011 (1435, 1640)


### Using Power Iteration to calculate the steady state probabilities, Pi¶

In [27]:
def aggregate_economy_int_consumptions(WIOT,VA_y):
economy_n = 41
industry_n = 35
economy_int_consumptions_n =1
economy_produsction_costs_n=1
states_n = economy_n*industry_n+economy_n*1+industry_n*1

economy_income = np.zeros((economy_n*industry_n,economy_n))
economy_int_consumptions = np.zeros((economy_n*industry_n,economy_n))
ind = economy_n*industry_n
for i in range(economy_n):
col = i*5+ind
economy_int_consumptions[:,i]=WIOT[:,col:col+5].sum(axis=1)

#I don't know why but sometimes it gets a bit negative?!!! due to change in inventory (5th column of economy consum)
economy_int_consumptions[economy_int_consumptions<0]=0

industry_produsction_costs = np.around(WIOT[:].sum(axis=1)-WIOT[:,:economy_n*industry_n].sum(axis=0)-VA_y,decimals=3)

return economy_int_consumptions,industry_produsction_costs

def build_markov_chain_Dense(Q,WIOT,economy_n,industry_n,economy_int_consumptions,industry_produsction_costs):
e0 = time.time()
eps = .001
#     economy_n = 41
#     industry_n = 35
#     economy_int_consumptions_n =1
#     economy_produsction_costs_n=1

for i in range(0,economy_n*industry_n):

#For industry interactions
for j in range(0,economy_n*industry_n):
Q[i,j]=WIOT[j,i]

#For payments of industry to its economy: This is the source of income for the economy
economy_ind_industry = i/industry_n
Q[i,economy_n*industry_n+economy_ind_industry]=industry_produsction_costs[i]

#For economy's costs (i.e. consumptions)
ind_economy_inTM = economy_n*industry_n

for j in range(economy_n):
Q[j+ind_economy_inTM,i]=economy_int_consumptions[i,j]

singular_industries = []
for i in range(Q.shape[0]):
s= sum(Q[i])
if s==0:
singular_industries.append(i%economy_n)
Q[i]=eps
Q[i,i]=1.
s= sum(Q[i])
Q[i]=np.divide(Q[i],s)
else:
Q[i]=np.divide(Q[i],s)
print "Making the TM in {} second".format(time.time() - e0)
#     print np.unique(singular_industries)
#     print "Transition Matrix is ready!"
return np.around(Q,decimals=10),list(np.unique(singular_industries))
def simulate_markov(TM,verbose='on'):
e1 = time.time()
states_n = TM.shape[0]
pi = np.ones(states_n);  pi1 = np.zeros(states_n);
pi = np.random.rand(states_n)
# pi[np.random.randint(1,high=size-1, size=1000)] = 1;

#     pi[np.random.randint(1,high=states_n, size=int(.1*states_n))] = np.random.rand(int(.1*states_n));
pi = pi/pi.sum()
#     print pi.shape
#     print pi.sum()
# pi[range(int(.1*size))] = 1;

n = norm(pi - pi1); i = 0;
diff = []
while n > 1e-6 and i <1*1e4 :
pi1 = TM.T.dot(pi).copy()
n = norm(pi - pi1); i += 1
diff.append(n)
pi = pi1.copy()
if verbose=='on':
print "Iterating {} times in {}".format(i, time.time() - e1)

mixing_ = i

return pi1,mixing_


In [28]:
Pi =[]
TMs = []
Mixing_times =[]
singular_ids = []
fig3 = plt.figure(figsize=(15,7))
for i,WIOT in enumerate(WIO):
VA_y = VA.values[i]
VA_y = np.zeros(VA.values[0].shape[0])
economy_int_consumptions,industry_produsction_costs = aggregate_economy_int_consumptions(WIOT,VA_y)
#     economy_int_consumptions,industry_produsction_costs = aggregate_economy_int_consumptions(WIOT)
economy_n = 41
industry_n = 35
states_n = economy_n*industry_n+economy_n
# Q = pysparse.spmatrix.ll_mat(states_n,states_n)
TM = np.zeros((states_n,states_n))
TM, singular_id = build_markov_chain_Dense(TM,WIOT,economy_n,industry_n,economy_int_consumptions,industry_produsction_costs)
TMs.append(TM)
singular_ids.extend(singular_id)
t,mixing_ = simulate_markov(TM)
Pi.append(t)
Mixing_times.append(mixing_)

singular_ids = np.unique(singular_ids)
#     pl = plt.plot(t)
#     pl = plt.plot(t[economy_n*industry_n:])
#     p =plt.xticks(range(economy_n),economy_names,rotation=45)

Making the TM in 0.859048843384 second
Iterating 289 times in 0.165277957916
Making the TM in 0.868260860443 second
Iterating 257 times in 0.144299030304
Making the TM in 0.852555036545 second
Iterating 251 times in 0.139472007751
Making the TM in 0.857586860657 second
Iterating 259 times in 0.146198034286
Making the TM in 0.861138820648 second
Iterating 262 times in 0.148849010468
Making the TM in 0.867842912674 second
Iterating 240 times in 0.133607149124
Making the TM in 0.876348972321 second
Iterating 228 times in 0.131730079651
Making the TM in 0.848108053207 second
Iterating 229 times in 0.130686044693
Making the TM in 0.880218029022 second
Iterating 222 times in 0.123178958893
Making the TM in 0.846520900726 second
Iterating 208 times in 0.11580991745
Making the TM in 0.85072183609 second
Iterating 190 times in 0.115315914154
Making the TM in 0.870454072952 second
Iterating 172 times in 0.0955309867859
Making the TM in 0.870944023132 second
Iterating 166 times in 0.0922381877899
Making the TM in 0.849496126175 second
Iterating 157 times in 0.0873749256134
Making the TM in 0.855029821396 second
Iterating 191 times in 0.109220027924
Making the TM in 0.882319927216 second
Iterating 173 times in 0.097767829895
Making the TM in 0.84793305397 second
Iterating 158 times in 0.0903940200806

<matplotlib.figure.Figure at 0x11d619090>

### 1- Mixing times¶

#### As an index of globalization¶

In [29]:
#MIXING TIME: TIME OR ITERATIONS REQUIRED TO REACH TO STEADY STATE. THIS IS A DIRECT MEASURE TO SHOW HOW CONNECTED THE
#NETWORK IS. MORE MODULAR NETWORK MORE ITERATIONS REQUIRED TO REACH TO STEADY STATE

fig = plt.figure(figsize=(10,7))
plt.plot(Mixing_times,"*-")
label = range(1995,2012)
# plt.title('Mixing Times')
plt.xticks(range(17),label,rotation=90)
plt.xlabel('Year')
plt.ylabel('Mixing Time of the Corresponding Markov Chain')
plt.grid()
#Why every year the TM reaches to steady state faster?


### If Kemeny constant is high this says we have less connected network, since there are less chances to go from one node in a community to another node in another community. Therefore, the average time will increase.¶

In [30]:
def Kemeny_constant(MC):
from scipy.linalg import eig
eigval, vl, vr = eig(MC, left=True)
eigval = np.real(eigval)
vl = np.real(vl)

eigvec_1 = vl[:,np.argmax(np.abs(eigval))]/vl[:,np.argmax(np.abs(eigval))].sum()
ind = np.around(eigval,decimals=8)!=1
#     print ind
#     print eigval
return np.divide(1,(1-eigval[ind])).sum(),eigvec_1

In [31]:
Kemenys = []
i = 0
for TM in TMs:
K, pi = Kemeny_constant(TM)
Kemenys.append(K)
i+=1
print i

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

In [32]:
fig = plt.figure(figsize=(10,7))
plt.plot(Kemenys,'o-')
label = range(1995,2012)
plt.xticks(range(17),label,rotation=90)
plt.grid()
plt.xlabel('Year')
plt.ylabel('Kemeny Constant of the Corresponding Markov Chain')

Out[32]:
<matplotlib.text.Text at 0x102b782d0>

## 2-Stationary distibutiotns, Pis¶

### What are the structurally strongest industries or who has the highest market share¶

In [33]:
names = []
for i,eco in enumerate(Countries.Country):
for j,ind in enumerate(industry_names):
names.append(eco+"-"+ind)
for i,eco in enumerate(Countries.Country):
names.append(eco+"-"+"Government")

print len(names)
Data_all = np.asarray(Pi).T
what_to_plot = Data_all

# fig = plt.figure(figsize=(15,7))
# np.argsort(what_to_plot[:industry_n*economy_n][:,2])[::-1]
DF = pd.DataFrame(data=what_to_plot[:economy_n*industry_n],columns=range(1995,2012),index=names[:economy_n*industry_n])
DF = DF.sort_values([2011],ascending=False)

ax = DF.T.plot(DF.T.index,DF.T.columns[:20],xticks=range(1995,2012),rot=45)
fig = ax.get_figure()

ax.legend(bbox_to_anchor=(1.77,1.021))
fig.set_size_inches(15,7)
plt.tight_layout()
font = {'size'   : 12}
plt.rc('font', **font)

1476


# Analysis at the county level¶

### World total production has been growing annually.¶

###### But what is interesting to know the relative share of economies in this growing pattern not in an isolated view which is the case in GDP growth¶
In [34]:
plt.title('world total GDP')
# plt.plot(range(1995,2012),GDP_USD_Selected_Countries.ix[:,5:22].sum(axis=0),'o-r')
plt.plot(range(1995,2012),VA.sum(axis=1),'o-r')
plt.grid()


# GDP trends of Economies¶

In [35]:
Data_all = np.asarray(Pi).T
# Data_all = Data_all[:economy_n*industry_n]

fig = plt.figure(figsize=(15,15))

GDP_shares = np.zeros((economy_n,17))
Economies_shares = np.zeros((economy_n,17))
for i in range(economy_n):
ind_indusrty =   range(i*industry_n,(i+1)*industry_n)
GDP_shares[i]=VA.values[:,ind_indusrty].sum(axis=1)
plt.subplot(7,6,i+1)
plt.title(Countries.Country[i])

plt.plot(range(17),GDP_shares[i],'-r',linewidth=1.2,label='GDP of Economy')
label = range(1995,2012)
plt.xticks(range(17),label,rotation=90)

plt.tight_layout()
plt.grid()
plt.legend(bbox_to_anchor=(3.19,1.022))
# fig.savefig(path,dpi=100)

Out[35]:
<matplotlib.legend.Legend at 0x11e95a410>