import os
import datetime as dt
import time
import io

import numpy as np
import requests

URL = "http://gdelt.utdallas.edu/data/gkg/" # The GKG data directory
PATH = "/Users/dmasad/Data/GDELT/GKG/" # The local directory to store the data

# Specify the start and end date for your data
start_date = dt.datetime(2013, 10, 5)
end_date = dt.datetime.today()
date = start_date

# For each date in between, download the corresponding file
while date <= end_date:
    filename = date.strftime("%Y%m%d") + ".gkg.csv.zip"
    req = requests.get(URL + filename)
    dl = io.open(PATH + filename, "wb")
    for chunk in req.iter_content(chunk_size=1024):
        if chunk:
            dl.write(chunk)
    dl.close()
    time.sleep(30) # Be nice and don't overload the server.
    date += dt.timedelta(days=1)

f = open(PATH + "20131001.gkg.csv")

headers = f.readline()
print headers.split("\t")

f.readline() # Skip a row
row = f.readline()
row = row.split("\t")
for entry in row:
    print entry

for entry in row[2].split(";"):
    print entry.split("#")

print row[3].split(";")

for entry in row[4].split(";"):
    print entry.split("#")

# PERSONS:
print row[5].split(";")

# ORGANIZATIONS
print row[6].split(";")

print row[7].split(",")

print row[8].split(",")

# SOURCE:
print row[9]

#SOURCEURL
print row[10]

LEADERS = ["Ali Hoseini-KHAMENEI", "Hasan Fereidun RUHANI", "Mohsen HAJI-MIRZAIE", 
    "Mohammad NAHAVANDIAN", "Eshaq JAHANGIRI", "Mohammad SHARIATMADARI", 
    "Elham AMINZADEH", "Mohammad Baqer NOBAKHT", "Majid ANSARI", 
    "Mohammad Baqer NOBAKHT", "Sorena SATARI", "Shahindokht MOLAVERDI", 
    "Ali Akbar SALEHI", "Mohammad Ali NAJAFI", "Masumeh EBTEKAR", 
    "Mohammad Ali SHAHADI", "Mohammad HOJJATI", "Mahmud VAEZI-Jazai", 
    "Ali JANATI", "Hosein DEHQAN", "Ali TAYEBNIA", "Ali Asqar FANI", 
    "Hamid CHITCHIAN", "Mohammad Javad ZARIF-Khonsari", 
    "Seyed Hasan QAZIZADEH-Hashemi", "Mohammad Reza NEMATZADEH", 
    "Mahmud ALAVI, Hojjat ol-Eslam", "Abdolreza Rahmani-FAZLI", 
    "Mostafa PUR-MOHAMMADI", "Ali RABIEI", "Bijan Namdar-ZANGANEH", 
    "Abbas Ahmad AKHUNDI", "Reza FARAJI-DANA", "Valiollah SEIF", 
    "Mohammad KHAZAI-Torshizi"]
# Convert the names to all lower-case
LEADERS = [name.lower() for name in LEADERS]

entries = []
for path in os.listdir(PATH):
    if path[-3:] != "csv": continue
    f = open(PATH + path)
    for row in f:
        actors = row.split("\t")[5].split(";")
        for actor in actors:
            if actor in LEADERS:
                entries.append(actors)
                break
print len(entries)

import itertools
from collections import defaultdict

dyads = defaultdict(int)
for entry in entries:
    for p1, p2 in itertools.combinations(entry, 2):
        if (p2, p1) in dyads:
            dyads[(p2, p1)] += 1
        else:
            dyads[(p1, p2)] += 1

import matplotlib.pyplot as plt
# Some initial styling, to make our graphs look good:
matplotlib.rcParams['axes.facecolor'] = "#eeeeee"
matplotlib.rcParams['axes.grid'] = True
matplotlib.rcParams['xtick.labelsize'] = 14
matplotlib.rcParams['ytick.labelsize'] = 14

fig = plt.figure(figsize=(20,12))
ax = fig.add_subplot(111)
ax.set_yscale('log')
h = ax.hist(dyads.values(), bins=np.linspace(1, 250, 26))

counts = np.array(dyads.values())
print len(counts[counts>1])/(1.0*len(counts))

import networkx as nx

# Build the graph
G = nx.Graph()
for dyad, count in dyads.iteritems():
    if count > 1:
        G.add_edge(dyad[0], dyad[1], weight=count)

nx.write_graphml(G, "iran.graphml")

fig = plt.figure(figsize=(20,20))
ax = fig.add_subplot(111)
pos = nx.spring_layout(G, k=0.2, iterations=25)
nx.draw_networkx_edges(G, pos=pos, ax=ax, edge_color='#eeeeee')
nx.draw_networkx_labels(G, pos=pos, ax=ax, font_size=16)
_ = ax.axis('off')

eigen_centralities = nx.eigenvector_centrality(G)
between_centralities = nx.betweenness_centrality(G)

fig = plt.figure(figsize=(20,12))
ax = fig.add_subplot(111)


for name in eigen_centralities.keys():
    ax.text(eigen_centralities[name], between_centralities[name], name, 
            fontdict={"size": 16})

ax.set_xlabel("Eigenvector Centrality", size=20)
ax.set_ylabel("Betweenness Centrality", size=20)

fig = plt.figure(figsize=(20,12))
ax = fig.add_subplot(111)

for name in eigen_centralities.keys():
    x = eigen_centralities[name]
    y = between_centralities[name]
    if x < 0.25 and y < 0.2 and y > 0:
        ax.text(x, y, name,  fontdict={"size": 16})
ax.set_xlim(0, 0.2)
ax.set_ylim(0, 0.16)
ax.set_xlabel("Eigenvector Centrality", size=20)
ax.set_ylabel("Betweenness Centrality", size=20)

fig = plt.figure(figsize=(20,12))
ax = fig.add_subplot(111)

for name in eigen_centralities.keys():
    x = eigen_centralities[name]
    y = between_centralities[name]
    if x < 0.05 and y < 0.02 and y > 0:
        ax.text(x, y, name,  fontdict={"size": 16})
ax.set_xlim(0, 0.05)
ax.set_ylim(0, 0.02)
ax.set_xlabel("Eigenvector Centrality", size=20)
ax.set_ylabel("Betweenness Centrality", size=20)

# Style the code using CSS shamelessly lifted from Bayesian Methods for Hackers
# https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers
from IPython.core.display import HTML
styles = open("Style.css").read()
HTML(styles)