%pylab --no-import-all inline

# Shows the version of pandas that we are using
!pip show pandas

#  import useful classes of pandas
import numpy as np
import pandas as pd
from pandas import Series, DataFrame, Index

import settings

# This cell should run successfully if you have a string set up to represent your census key

try:
    import settings
    assert type(settings.CENSUS_KEY) == str or type(settings.CENSUS_KEY) == unicode
except Exception as e:
    print "error in importing settings to get at settings.CENSUS_KEY", e

# let's figure out a bit about the us module, in particular, us.states
# https://github.com/unitedstates/python-us

from us import states

for (i, state) in enumerate(states.STATES):
    print i, state.name, state.fips

import requests

# get the total population of all states
url = "http://api.census.gov/data/2010/sf1?key={key}&get=P0010001,NAME&for=state:*".format(key=settings.CENSUS_KEY)

# note the structure of the response
r = requests.get(url)

# FILL IN
# drop the header record
from itertools import islice
# total population including PR is 312471327


# FILL IN
# exclude PR:  308745538


# let's now create a DataFrame from r.json()

df = DataFrame(r.json()[1:], columns=r.json()[0])
df.head()

# FILL IN
# calculate the total population using df


# FILL IN -- now calculate the total population excluding Puerto Rico


from settings import CENSUS_KEY
import census

c=census.Census(settings.CENSUS_KEY) 
c.sf1.get(('NAME', 'P0010001'), {'for': 'state:%s' % states.CA.fips})

"population of California: {0}".format(
        int(c.sf1.get(('NAME', 'P0010001'), {'for': 'state:%s' % states.CA.fips})[0]['P0010001']))

ca_counties = c.sf1.get(('NAME', 'P0010001'), geo={'for': 'county:*', 'in': 'state:%s' % states.CA.fips})

# create a DataFrame, convert the 'P0010001' column
# show by descending population
df = DataFrame(ca_counties)
df['P0010001'] = df['P0010001'].astype('int')
df.sort_index(by='P0010001', ascending=False)

#http://stackoverflow.com/a/13130357/7782
count,division = np.histogram(df['P0010001'])
df['P0010001'].hist(bins=division)