#!/usr/bin/env python
# coding: utf-8

# #International Political Tele-Conferencing Equilibrium System (IPTCES) algorithm
# The following is a public-facing walkthrough of the code used to for the predictive tool that powers [@WorldLeaderTips](https://twitter.com/worldleadertips). The styling of the out graph has been changed for this context.

# In[1]:


get_ipython().run_line_magic('pylab', 'inline')


# In[2]:


import json
import random
import datetime as dt
from collections import deque

import numpy as np
import pandas as pd
from pandas import *
from pandas.io.json import json_normalize

import matplotlib.pylab
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import statsmodels.api as sm
from statsmodels.tsa import *

from bigquery import get_client


# In[3]:


tomorrow = str(dt.date.today())
twoweeksago = str(dt.date.today() - dt.timedelta(days = 14))
lastweek = dt.datetime.today() - dt.timedelta(days = 7)
nextweek = dt.date.today() + dt.timedelta(days = 8)
day_index = dt.datetime.today().weekday()


# In[4]:


# Initialize Google BigQuery client:
with open('/PATH/TO/settings.json', 'rb') as json_data:
    settings = json.load(json_data)

client = get_client(settings['PROJECT_ID'], service_account=settings['SERVICE_ACCOUNT'], private_key=settings['KEY'], readonly=True)


# In[5]:


# Load the Goldstein suggstions table:
gsCodes = pd.read_csv('assets/goldstein_suggestions.tsv', index_col='code', sep='\t')


# In[6]:


leader_display_name = 'Pope Francis'
gdelt_search = "WHERE Actor1Name CONTAINS 'POPE' AND SQLDATE > 20130313"

date_start = '19790101'
date_end = ''.join(str(dt.date.today()).split('-'))

query_template = 'SELECT GLOBALEVENTID, SQLDATE, Actor1Name, Actor1CountryCode, GoldsteinScale, NumMentions FROM [gdelt-bq:full.events] {} AND SQLDATE>{} AND SQLDATE<{} IGNORE CASE;'
query = query_template.format(gdelt_search, date_start, date_end)

print 'Search for: {}'.format(leader_display_name)
print 'Query: {}'.format(query)


# In[7]:


filename = '{}.csv'.format(leader_display_name.replace(' ', '_'))


# In[8]:


try:
    job_id, results = client.query(query, timeout=2000)
    print '{} results'.format(str(len(results)))

    if len(results) == 0:
        print "NO RESULTS"

    df = json_normalize(results)
    df = df.sort(columns='SQLDATE')
    df.to_csv('{}'.format(filename), encoding='utf-8')
    print 'Saved: {}'.format(filename)

except:
    print 'ERROR: Timed out'


# In[9]:


df = pd.read_csv(filename, index_col='SQLDATE', parse_dates=True)
df.tail()


# In[10]:


# These steps incorporate the number of mentions a Goldstein score is associated with, reducing the impact of error in the event encoding, making the average better reflect the event's presence in the GDELT.
df['GoldMentions'] = df['GoldsteinScale'] * df['NumMentions']
goldstein = df.groupby([df.index.date]).agg({'GoldMentions': np.sum, 'NumMentions': np.sum})
goldstein['GoldAverage'] = goldstein['GoldMentions'] / goldstein['NumMentions']


# In[11]:


full_daterange = pd.date_range(start=min(df.index), end=max(df.index))


# In[12]:


# interpolate() takes care of days that do not have a entry / Goldstein score in GDELT:
goldstein = goldstein.reindex(full_daterange).interpolate(method='linear')


# In[13]:


# In case there is not enough historical data for the world leader:
if len(goldstein['GoldAverage']) < 230:
    day_difference = 230 - len(goldstein['GoldAverage']) + 1

    new_dates = [goldstein.index[0] - dt.timedelta(days=x) for x in range(1, day_difference)]
    new_dates = reversed(new_dates)

    columns = ['NumMentions', 'GoldMentions', 'GoldAverage']
    temp_data = {col: [None for x in range(1, day_difference)] for col in columns}
    missing = pd.DataFrame(temp_data, index=new_dates, columns=columns)

    temp_goldstein = missing.append(goldstein)
    goldstein = temp_goldstein.fillna(method='bfill')

    print 'Insufficient data, missing entries were backfilled.'
else:
    print 'Sufficient data, backfilling was avoided.'


# In[14]:


#Create two instances of the data, one to train the model...
training = pd.DataFrame(goldstein['GoldAverage'])
training.index = pd.to_datetime(training.index)
training.columns = ['Goldstein daily average']
print 'Test Sample {}\n'.format(training.tail())

# ... the other to create the plot:
plot_sample = pd.DataFrame(goldstein['GoldAverage'][-230:])
plot_sample.index = pd.to_datetime(plot_sample.index)
plot_sample.columns = ['Goldstein daily average']
print 'Plot Sample {}'.format(plot_sample.tail())


# In[15]:


# Creates the forcasting model using Autoregressive Moving Average (ARMA):
tries = 0
success = False
while tries < 6 and success is False:
    try:
        model = sm.tsa.ARMA(training,(12, tries)).fit()
        success = True
    except:
        tries += 1

if success is False:
    print "NOT SUCCESSFUL"
    
print model


# In[16]:


# Creates the prediction based on the proceeding two weeks through one week into the future:
prediction = model.predict(twoweeksago, str(nextweek), dynamic = False)

# The fitting of the prediction to the model:
prediction = prediction.shift(-1)


# In[17]:


def stir_the_pot(sample_range, st_dev):
    mean = sample_range.mean()
    added_range = mean + st_dev
    random_suggestion = None

    # Randomly suggest something either above or below the stagnant Goldstein range:
    if random.randint(0,1) == 1:
        random_suggestion = "{0:.1f}".format(random.uniform(float(added_range),10))
    else:
        random_suggestion = "{0:.1f}".format(random.uniform(-10,-float(added_range)))

    print 'new suggestion: {}'.format(random_suggestion)
    return float(random_suggestion)


# In[18]:


# Based on the Prediction, determine the Suggestion:
sample_window = training[-60:]
standard_deviation = float(sample_window.std())
suggestion = None

if standard_deviation < 2.1 and random.randint(1,5) > 2:
    # The leader has been stuck in a rut and is not honing in on Goldstein Score of '1', 3/5 chance of getting a new suggestion:
    print 'Your actions are too stagnant!'
    suggestion = stir_the_pot(sample_window, standard_deviation)

    predicts = round(prediction.ix[tomorrow:str(nextweek)].mean(), 1)
    
else:
    # Finds the average of the Goldstein scores for the coming week:
    predicts = round(prediction.ix[tomorrow:str(nextweek)].mean(), 1)
    suggestion = round(((predicts - 1) * -1), 1)

print 'suggestion: {}'.format(suggestion)


# In[19]:


# From the possible suggestions for the score, find one at random:
prediction_text = random.choice(gsCodes.loc[predicts].values[0].split(';')).strip()
gsDescription   = random.choice(gsCodes.loc[suggestion].values[0].split(';')).strip()


# In[20]:


print '=================================================='
print "{}'s Forecast: {} {} ".format(leader_display_name, predicts, prediction_text)
print "{}'s Suggested score: {} ".format(leader_display_name, suggestion)

if predicts == 1.0:
    gsDescription = "You're doing great. Stay the course."
else:
    print "Suggested actions for the coming week:\n{}".format(gsDescription)

print '=================================================='


# In[21]:


plot_sample.plot(kind='line',
                 title='{}: Goldstein Trend and Prediction'.format(leader_display_name),
                 ylim=(-10,10),
                 figsize=(16,8),
                 color='lightblue')
prediction.plot(kind='line', 
                label='prediction',
                legend=True,
                color='red')


# In[ ]: