import sys, os, urllib
import pandas as pd
import numpy as np
#Set the URL path and values
state = 'la' #Louisiana
path = 'https://waterdata.usgs.gov/{}/nwis/water_use?'.format(state)
values = {'format':'rdb',
'rdb_compression':'value',
'wu_area':'County',
'wu_year':'2010',#2005,2010',
'wu_county':'ALL',
'wu_county_nms':'--ALL+Counties--',
'wu_category_nms':'--ALL+Categories--'
}
#Retrieve the data as a response object
url = path + urllib.urlencode(values)
#Pull data in using the URL and remove the 2nd row of headers
dfRaw = pd.read_table(url,comment='#',header=[0,1],na_values='-')
dfRaw.columns = dfRaw.columns.droplevel(level=1)
#Read locally, for debugging, and drop the 2nd row of headers
#dfRaw = pd.read_table('../../Data/Proprietary/LA.txt',comment='#',header=[0,1],na_values='-')
#dfRaw.columns = dfRaw.columns.droplevel(level=1)
#Tidy the data
rowHeadings = ['county_cd', 'county_nm', 'state_cd', 'state_name', 'year']
dfTidy = pd.melt(dfRaw,id_vars=rowHeadings,value_name='MGal',var_name='Group')
dfTidy.shape
(17664, 7)
#Remove rows that don't have volume data
dfTidy = dfTidy[dfTidy['Group'].str.contains('Mgal')]
dfTidy.shape
(15040, 7)
#Change the type of the MGal column to float
dfTidy['MGal'] = dfTidy.MGal.astype(np.float)
dfTidy['MGal'].sum()
50620.130000000005
#Fill in Source and Destination columns
dfTidy.loc[dfTidy['Group'].str.startswith('Public Supply'), 'Destination'] = 'PS'
#Summarize
dfState = dfTidy.groupby(['Group'])['MGal'].sum()
dfState.to_csv('{}.csv'.format(state))