import json
import requests
from pandas import DataFrame

# read population in from JSON-formatted data derived from the Wikipedia
pop_json_url = "https://gist.github.com/rdhyee/8511607/" + \
     "raw/f16257434352916574473e63612fcea55a0c1b1c/population_of_countries.json"
pop_list= requests.get(pop_json_url).json()

df = DataFrame(pop_list)
df[:5]

df.dtypes

from pandas import DataFrame, Series
import numpy as np

s1 = Series(np.arange(-1,4))
s1

from census import Census
from us import states

import settings

c = Census(settings.CENSUS_KEY)
c.sf1.get(('NAME', 'P0010001'), {'for': 'state:%s' % states.CA.fips})

from pandas import DataFrame
import numpy as np
from census import Census
from us import states

import settings

c = Census(settings.CENSUS_KEY)

r = c.sf1.get(('NAME', 'P0010001'), {'for': 'state:*'})
df1 = DataFrame(r)

df1.head()

len(df1)

print df1.P0010001.sum()
print
print df1.P0010001.astype(int).sum()

~Series([True, True, False, True])

pop1 = df1['P0010001'].astype('int').sum() 
pop2 = df1[np.in1d(df1.state, [s.fips for s in states.STATES])]['P0010001'].astype('int').sum()

pop1-pop2

import pandas as pd
from pandas import DataFrame

import census
import settings
import us

from itertools import islice

c=census.Census(settings.CENSUS_KEY)

def places(variables="NAME"):
    
    for state in us.states.STATES:
        geo = {'for':'place:*', 'in':'state:{s_fips}'.format(s_fips=state.fips)}
        for place in c.sf1.get(variables, geo=geo):
            yield place
   

r = list(islice(places("NAME,P0010001"), None))
places_df = DataFrame(r)
places_df.P0010001 = places_df.P0010001.astype('int')

print "number of places", len(places_df)
print "total pop", places_df.P0010001.sum()
places_df.head()


places_df[places_df.state=='06'].sort_index(by='P0010001', ascending=False).head()

# numpy and pandas related imports 

import numpy as np
from pandas import Series, DataFrame
import pandas as pd

# for example, using lower and uppercase English letters

import string

lower = Series(list(string.lowercase), name='lower')
upper = Series(list(string.uppercase), name='upper')

df2 = pd.concat((lower, upper), axis=1)
df2['ord'] = df2['lower'].apply(ord)
df2.head()

string.upper('b')

words = ['Berkeley', 'I', 'School']

for (i, word) in islice(enumerate(words),1):
    print (i, word)

def g2():
    words = ['Berkeley', 'I', 'School']
    for word in words:
        if word != 'I':
            for letter in list(word):
                yield letter
            
my_g2 = g2()

def g3():
    words = ['Berkeley', 'I', 'School']
    for word in words:
        yield words


import us
import census
import settings

import pandas as pd
import numpy as np
from pandas import DataFrame, Series
from itertools import islice

c = census.Census(settings.CENSUS_KEY)

def states(variables='NAME'):
    geo={'for':'state:*'}
    
    states_fips = set([state.fips for state in us.states.STATES])
    # need to filter out non-states
    for r in c.sf1.get(variables, geo=geo, year=2010):
        if r['state'] in states_fips:
            yield r
            
# make a dataframe from the total populations of states in the 2010 Census

df = DataFrame(states('NAME,P0010001'))
df.P0010001 = df.P0010001.astype('int')
df['first_letter'] = df.NAME.apply(lambda s:s[0])

df.head()


print list(df.NAME)

def normalize(s):
    """take a Series and divide each item by the sum so that the new series adds up to 1.0"""
    total = np.sum(s)
    return s.astype('float') / total

def entropy(series):
    """Normalized Shannon Index"""
    # a series in which all the entries are equal should result in normalized entropy of 1.0
    
    # eliminate 0s
    series1 = series[series!=0]

    # if len(series) < 2 (i.e., 0 or 1) then return 0
    
    if len(series1) > 1:
        # calculate the maximum possible entropy for given length of input series
        max_s = -np.log(1.0/len(series))
    
        total = float(sum(series1))
        p = series1.astype('float')/float(total)
        return sum(-p*np.log(p))/max_s
    else:
        return 0.0

def gini_simpson(s):
    # https://en.wikipedia.org/wiki/Diversity_index#Gini.E2.80.93Simpson_index
    s1 = normalize(s)
    return 1-np.sum(s1*s1)