import datetime
import pandas as pd
import numpy as np
from collections import defaultdict
import scipy.stats as ss
from scipy.optimize import fmin as scipyfmin
import operator
import re
import json

#if you are using ipython and want to see things inline. 
%pylab inline 

def load_files(folder):
    M = np.load(folder+'M.npy')
    user_dict = json.load(open(folder+'user_dict.json', 'r'))
    article_dict = json.load(open(folder+'article_dict.json', 'r')) 
    user_exogenous_ranks = json.load(open(folder+'user_exogenous_ranks.json', 'r'))
    article_exogenous_ranks = json.load(open(folder+'article_exogenous_ranks.json', 'r'))
    return {'M':M,
            'user_dict':user_dict,
            'article_dict':article_dict,
            'user_exogenous_ranks':user_exogenous_ranks, 
            'article_exogenous_ranks':article_exogenous_ranks}

feminist_data = load_files('savedata/Category:Feminist_writers/2014-02-18/2014-02-18/')

def make_bin_matrix(M):
    #this returns the a matrix with entry True where the original was nonzero, and zero otherwise.
    M[M>0] = 1.0
    return M

def M_test_triangular(M):
    user_edits_sum = M.sum(axis=1)
    article_edits_sum = M.sum(axis=0)
    
    user_edits_order = user_edits_sum.argsort()
    article_edits_order = article_edits_sum.argsort()
    
    M_sorted = M[user_edits_order,:]
    M_sorted_sorted = M_sorted[:,article_edits_order]
    
    M_bin = make_bin_matrix(M_sorted_sorted)
    
    plt.figure(figsize=(10,10))
    imshow(M_sorted_sorted, cmap=plt.cm.bone, interpolation='nearest')

bin_M = make_bin_matrix(feminist_data['M'])
M_test_triangular(bin_M)

def Gcp_denominateur(M, p, k_c, beta):
    M_p = M[:,p]
    k_c_beta = k_c ** (-1 * beta)
    return np.dot(M_p, k_c_beta)

def Gpc_denominateur(M, c, k_p, alpha):
    M_c = M[c,:]
    k_p_alpha = k_p ** (-1 * alpha)
    return np.dot(M_c, k_p_alpha)


def make_G_hat(M, alpha=1, beta=1):
    '''G hat is Markov chain of length 2
    Gcp is a matrix to go from  contries to product and then 
    Gpc is a matrix to go from products to ccountries'''
    
    k_c  = M.sum(axis=1) #aka k_c summing over the rows
    k_p = M.sum(axis=0) #aka k_p summering over the columns
    
    G_cp = np.zeros(shape=M.shape)
    #Gcp_beta
    for [c, p], val in np.ndenumerate(M):
        numerateur = (M[c,p]) * (k_c[c] ** ((-1) * beta))
        denominateur = Gcp_denominateur(M, p, k_c, beta)
        G_cp[c,p] = numerateur / float(denominateur)
    
    
    G_pc = np.zeros(shape=M.T.shape)
    #Gpc_alpha
    for [p, c], val in np.ndenumerate(M.T):
        numerateur = (M.T[p,c]) * (k_p[p] ** ((-1) * alpha))
        denominateur = Gpc_denominateur(M, c, k_p, alpha)
        G_pc[p,c] = numerateur / float(denominateur)
    
    
    return {'G_cp': G_cp, "G_pc" : G_pc}

def w_generator(M, alpha, beta):
    #this cannot return the zeroeth iteration
    
    G_hat = make_G_hat(M, alpha, beta)
    G_cp = G_hat['G_cp']
    G_pc = G_hat['G_pc']
    #

    fitness_0  = np.sum(M,1)
    ubiquity_0 = np.sum(M,0)
    
    fitness_next = fitness_0
    ubiquity_next = ubiquity_0
    i = 0
    
    while True:
        
        fitness_prev = fitness_next
        ubiquity_prev = ubiquity_next
        i += 1
        
        fitness_next = np.sum( G_cp*ubiquity_prev, axis=1 )
        ubiquity_next = np.sum( G_pc* fitness_prev, axis=1 )
        
        yield {'iteration':i, 'fitness': fitness_next, 'ubiquity': ubiquity_next}
        

def w_stream(M, i, alpha, beta):
    """gets the i'th iteration of reflections of M, 
    but in a memory safe way so we can calculate many generations"""
    if i < 0:
        raise ValueError
    for j in w_generator(M, alpha, beta):
        if j[0] == i:
            return {'fitness': j[1], 'ubiquity': j[2]}
            break
            
def find_convergence(M, alpha, beta, fit_or_ubiq, do_plot=False,):
    '''finds the convergence point (or gives up after 1000 iterations)'''
    if fit_or_ubiq == 'fitness':
        Mshape = M.shape[0]
    elif fit_or_ubiq == 'ubiquity':
        Mshape = M.shape[1]
    
    rankings = list()
    scores = list()
    
    prev_rankdata = np.zeros(Mshape)
    iteration = 0

    for stream_data in w_generator(M, alpha, beta):
        iteration = stream_data['iteration']
        
        data = stream_data[fit_or_ubiq]
        rankdata = data.argsort().argsort()
        
        #test for convergence
        if np.equal(rankdata,prev_rankdata).all():
            break
        if iteration == 1000:
            break
        else:
            rankings.append(rankdata)
            scores.append(data)
            prev_rankdata = rankdata
            
    if do_plot:
        plt.figure(figsize=(iteration/10, Mshape / 20))
        plt.xlabel('Iteration')
        plt.ylabel('Rank, higher is better')
        plt.title('Rank Evolution')
        p = semilogx(range(1,iteration), rankings, '-,', alpha=0.5)
    return {fit_or_ubiq:scores[-1], 'iteration':iteration}

def w_star_analytic(M, alpha, beta, w_star_type):
    k_c  = M.sum(axis=1) #aka k_c summing over the rows
    k_p = M.sum(axis=0) #aka k_p summering over the columns
    
    A = 1
    B = 1
    
    def Gcp_denominateur(M, p, k_c, beta):
        M_p = M[:,p]
        k_c_beta = k_c ** (-1 * beta)
        return np.dot(M_p, k_c_beta)
    
    def Gpc_denominateur(M, c, k_p, alpha):
        M_c = M[c,:]
        k_p_alpha = k_p ** (-1 * alpha)
        return np.dot(M_c, k_p_alpha)
    
    if w_star_type == 'w_star_c':
        w_star_c = np.zeros(shape=M.shape[0])

        for c in range(M.shape[0]):
            summand = Gpc_denominateur(M, c, k_p, alpha)
            k_beta = (k_c[c] ** (-1 * beta))
            w_star_c[c] = A * summand * k_beta

        return w_star_c
    
    elif w_star_type == 'w_star_p':
        w_star_p = np.zeros(shape=M.shape[1])
    
        for p in range(M.shape[1]):
            summand = Gcp_denominateur(M, p, k_c, beta)
            k_alpha = (k_p[p] ** (-1 * alpha))
            w_star_p[p] = B * summand * k_alpha
    
        return w_star_p

#purer python
#score
w_scores = w_star_analytic(M=feminist_data['M'], alpha=0.5, beta=0.5, w_star_type='w_star_c')
#identify
w_ranks = {name: w_scores[pos] for name, pos in feminist_data['user_dict'].iteritems() }
#sort
w_ranks_sorted = sorted(w_ranks.iteritems(), key=operator.itemgetter(1))

#or use pandas
w_scores_df = pd.DataFrame.from_dict(w_ranks, orient='index')
w_scores_df.columns = ['w_score']
w_scores_df.sort(columns=['w_score'], ascending=False).head()

convergence = find_convergence(M=feminist_data['M'], alpha=0.5, beta=0.5, fit_or_ubiq='fitness', do_plot=True)

convergence['iteration']

w_iter = convergence['fitness'] 
#rank
w_iter_ranks = {name: w_iter[pos] for name, pos in feminist_data['user_dict'].iteritems() }
#sort
w_ranks_sorted = sorted(w_iter_ranks.iteritems(), key=operator.itemgetter(1))

#or use pandas
w_iter_scores_df = pd.DataFrame.from_dict(w_iter_ranks, orient='index')
w_iter_scores_df.columns = ['w_score']
w_iter_scores_df.sort(columns=['w_score'], ascending=False).head()

'''I'm sure this can be done much more elegantly
but this was sort-of drink-a-lot-of-coffee-one-afternoon-and-get-it-done
cleaning this up is an exercise for the reader'''

def rank_comparison(a_ranks_sorted, b_ranks_sorted, do_plot=False):
    a_list = list()
    b_list = list()
    for atup in a_ranks_sorted:
        aiden = atup[0]
        apos = atup[1]
        #find this in our other list
        for btup in b_ranks_sorted:
            biden = btup[0]
            bpos = btup[1]
            if aiden == biden:
                a_list.append(apos)
                b_list.append(bpos)
    if do_plot:    
        plt.figure(figsize=(10,20))
        plot([1,2], [a_list, b_list], '-o')
        plt.show()
    
    return ss.spearmanr(a_list, b_list)

def calibrate_analytic(M, ua, exogenous_ranks_sorted, user_or_art_dict, index_function, title, do_plot=False):
    
    if ua == 'users':
        w_star_type = 'w_star_c'
    elif ua == 'articles':
        w_star_type = 'w_star_p'
    
    squarelen = range(0,50)
    
    alpha_range = map(index_function,squarelen)
    beta_range = map(index_function,squarelen)
    landscape = np.zeros(shape=(len(list(alpha_range)),len(list(beta_range))))

    top_spearman = {'spearman':None,'alpha':None, 'beta':None, 'ua':ua}

    for alpha_index, alpha in enumerate(alpha_range):
        for beta_index, beta in enumerate(beta_range):
            
            w_converged = w_star_analytic(M, alpha, beta, w_star_type)
            
            w_ranks = {name: w_converged[pos] for name, pos in user_or_art_dict.iteritems() }
            w_ranks_sorted = sorted(w_ranks.iteritems(), key=operator.itemgetter(1))
            
            spearman = rank_comparison(w_ranks_sorted, exogenous_ranks_sorted)

            if spearman[1] < 0.05:
                landscape[alpha_index][beta_index] = spearman[0]
                
                if (not top_spearman['spearman']) or (spearman[0] > top_spearman['spearman']):
                    top_spearman['spearman'] = spearman[0]
                    top_spearman['alpha'] = alpha
                    top_spearman['beta'] = beta
            else:
                landscape[alpha_index][beta_index] = np.nan

    if do_plot:
        plt.figure(figsize=(10,10))
        heatmap = imshow(landscape, interpolation='nearest', vmin=-1, vmax=1)
        #heatmap = plt.pcolor(landscape)
        colorbar = plt.colorbar(heatmap)
        plt.xlabel(r'$ \beta $')
        plt.xticks(squarelen, beta_range, rotation=90)
        plt.ylabel(r'$ \alpha $')
        plt.yticks(squarelen, alpha_range)
        plt.title(title)
        
        landscape_file = open(title+'_landscape.npy', 'w')
        np.save(landscape_file, landscape)
        plt.savefig(title+'_landscape.eps')

    return top_spearman

user_spearman = calibrate_analytic(M=make_bin_matrix(feminist_data['M']),
                                   ua='users',
                                   exogenous_ranks_sorted=feminist_data['user_exogenous_ranks'],
                                   user_or_art_dict=feminist_data['user_dict'],
                                   index_function=lambda x: (x-25)/12.5, 
                                   title='Grid Search for User of Feminist Writers',
                                   do_plot=True)

print('Optimizing points from gridsearch', 
      'rho:', user_spearman['spearman'], 
      'alpha', user_spearman['alpha'], 
      'beta', user_spearman['beta'] )