## Setting up tools
import csv
import codecs
import pandas as pd
import numpy as np
import os
from pandas import DataFrame, Series
from itertools import islice
from bs4 import BeautifulSoup

dtype = {'FIPS':np.object}

## Extracting Literacy rates, and College degree rates
f1 = codecs.open(os.path.abspath("data/LiteracyCollegeDegreeData.csv"), encoding='iso-8859-1')
df1 = pd.read_csv(f1, dtype=dtype)

## Extracting Crime, motor vehicle mortality, average income and smoking rates
f2 = codecs.open(os.path.abspath("data/CrimeRate.csv"), encoding='iso-8859-1')
header = list()
data = list()
n = 0
for row in islice(f2, None):
    if n == 0:
        pass
    elif n == 1:
        header.append(row.split(','))
    else:
        data.append(row.split(','))
    n = n + 1
        
df2 = DataFrame(data, columns = header[0])
df2 = df2[['FIPS', 'County', 'State', 'Violent Crime Rate', '% AFGR', 'MV Mortality Rate', '% Smokers']]

f2 = codecs.open(os.path.abspath("data/PopIncome.csv"), encoding='iso-8859-1')
header1 = list()
data1 = list()
n = 0
for row in islice(f2, None):
    if n == 0:
        pass
    elif n == 1:
        header1.append(row.split(','))
    else:
        data1.append(row.split(','))
    n = n + 1
        
df3 = DataFrame(data1, columns = header1[0])
df3 = df3[['FIPS', 'County', 'State', 'Population', 'Household Income']]

## Extracting county health rankings

dtype = {'FIPS':np.object}
ff = codecs.open(os.path.abspath("data/CountyHealthRankings.csv"), encoding='iso-8859-1')
header = list()
data = list()
n = 0
for row in islice(ff, None):
    if n == 0:
        pass
    elif n == 1:
        header.append(row.split(','))
    else:
        data.append(row.split(','))
    n = n + 1
health = DataFrame(data, columns = header[0])

dataframe = pd.merge(df2, df3)
df_final = pd.merge(df1, dataframe, on = 'FIPS')

# Deleting redundant or unnecessary columns, then cleaning up the dataframe by renaming the columns
for col in ('State_y', 'County_y', 'Population_x', '95%CI-Low(College)', '95%CI-Low(Illiterate)', '95%CI-High(Illiterate)', 'Quartile'):
    del df_final[col]
    
df_final = df_final.rename(columns={'Population_y': 'Population', 'State_x': 'State', 'County_x': 'County'})
df_final['# of Ranked Counties'] = health['# of Ranked Counties']
df_final['Health Outcome Rank'] = health['Health Outcome Rank'] 

def get_state(state):
    '''
    Returns subset of df_final that is only the counties in the state
    state should be a state's FIP code, e.g., '06' for California
    '''
    if len(state) > 2:
        fip = state[:2]
    else:
        fip = state
    return df_final[df_final['FIPS'].map(lambda x: x[:2] == fip)]

# False is for 'good' metrics and True is for 'bad' metrics.
ascending = {'NumberWentToSomeCollege': False, 'PercentWithCollegeDegree': False, '95%CI-High(College)': False, 'Illiterate': True, '% Smokers':True, 'MV Mortality Rate': True, '% AFGR': False, 'Violent Crime Rate': True, 'Population': False, 'Household Income':False, '# of Ranked Counties': False, 'Health Outcome Rank': False}


def getStateCountyMetricRankings(stateFips):
  '''
  Returns a dataframe with all the metrics ranked for all the counties in this state.
  stateFips should be a FIPS code for a state with just the first two characters that are relevant.
  '''
  if len(stateFips) > 2:
      stateFips = stateFips[:2]
  finalDataFrame = None
  for metric in ascending:
    rankedColumn = get_state(stateFips)[metric].rank(ascending=ascending[metric])
    if finalDataFrame is None:
        finalDataFrame = DataFrame(index = rankedColumn.index)
        finalDataFrame[metric] = rankedColumn
    else:
        finalDataFrame[metric] = rankedColumn  
  return finalDataFrame

translationMap = {"Crime Rate": "Violent Crime Rate", "HS Grad Rate": "% AFGR", \
"Healthcare Cost": "Health Outcome Rank", "College Degree Holders": "PercentWithCollegeDegree", "Population": "Population", \
"Income": "Household Income", "Literacy Rate": "Illiterate", "MV Mortality Rate": "MV Mortality Rate", \
"Percent of Smokers": "% Smokers", "Number attended College": "NumberWentToSomeCollege"}

ranks = [rank1, rank2, rank3, rank4, rank5, rank6, rank7, rank8, rank9, rank10]

weights = {}

# Populate the weights of the metrics.
for i,rank in enumerate(ranks):
    translatedDFMetric = translationMap[rank]
    weights[translatedDFMetric] = i + 1
    
print weights

stateDF = get_state(state)
stateCountyMetrics = getStateCountyMetricRankings(state)

# Key is County FIPS, Value is the score
rankScores = {}

for index in stateDF.index:
    countyFip = stateDF.ix[index].ix['FIPS']
    for metric in weights:
        weight = weights[metric]
        metricValue = stateCountyMetrics.ix[index].ix[metric]
        if countyFip not in rankScores:
            rankScores[countyFip] = 0
        rankScores[countyFip] += weight*metricValue


num_counties = len(stateDF)

def rgb_to_hex(rgb):
    return '#%02x%02x%02x' % rgb

def generate_rgb(val, maxval):
    f = float(val) / (maxval)
    r, g, b = int((1-f)*255), int(f*255), 0
    return rgb_to_hex((g, r, b))

colors = []
for val in xrange(0, num_counties+1):
    colors.append(generate_rgb(val, num_counties))

from collections import defaultdict

counties_rating = rankScores

counties_sorted = sorted(counties_rating, key=counties_rating.get, reverse=False)
counties_colors = defaultdict(int)

i = 0
while (i < num_counties):
    try:
        counties_colors[counties_sorted[i]] = colors[i]
        i = i + 1
    except:
        i = i + 1
        continue

from IPython.display import SVG
SVG(filename="counties.svg")

svg = codecs.open(os.path.abspath("data/counties.svg"), encoding='iso-8859-1').read()

soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview'])
paths = soup.findAll('path')
svgs = soup.findAll('svg')
viewbox = "5 90 650 300"
style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1; stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt; marker-start:none;stroke-linejoin:bevel;fill:'
blank = 'fill:#FFFFFF';
path_onmouseover_open = "displayName('"
path_onmouseover_close = "')"

legends = []
countyNames = defaultdict(int)

rank = 0
for p in paths:
    if p['id'][0:2] == state[:2] and p['id'] not in ["State_Lines", "separator"]:
        try:
            color = counties_colors[p['id']]
            p['style'] = style + color
            rank = counties_sorted.index(p['id'])
            countyNames[p['id']] = p['inkscape:label']
            p['inkscape:label'] = p['inkscape:label'] + '. Rank: ' + str(rank+1)
            name = p['inkscape:label']
            legends.append(name)
            p['onmouseover'] = path_onmouseover_open + name + path_onmouseover_close
            count += 1
        except:
            continue
    else:
        p['style'] = blank
        
for s in svgs:
    s['viewBox']= viewbox
    
with open("output.svg", "wb") as file:
    file.write(soup.prettify(formatter=None))

for fips in counties_sorted[:10]:
    print "Rank" + str(counties_sorted.index(fips) + 1) + " " + str(countyNames[fips])

from IPython.display import SVG
SVG(filename="output.svg")