Url =¶

https://waterdata.usgs.gov/la/nwis/water_use?format=rdb&rdb_compression=value&wu_area=County&wu_year=2000%2C2005%2C2010&wu_county=ALL&wu_category=ALL&wu_county_nms=--ALL%2BCounties--&wu_category_nms=--ALL%2BCategories--

In [1]:

import sys, os, urllib
import pandas as pd
import numpy as np

In [2]:

#Set the URL path and values
state = 'la' #Louisiana
path = 'https://waterdata.usgs.gov/{}/nwis/water_use?'.format(state)
values = {'format':'rdb',
         'rdb_compression':'value',
         'wu_area':'County',
         'wu_year':'2010',#2005,2010',
         'wu_county':'ALL',
         'wu_county_nms':'--ALL+Counties--',
         'wu_category_nms':'--ALL+Categories--'
        }

In [3]:

#Retrieve the data as a response object
url = path + urllib.urlencode(values)

In [4]:

#Pull data in using the URL and remove the 2nd row of headers
dfRaw = pd.read_table(url,comment='#',header=[0,1],na_values='-')
dfRaw.columns = dfRaw.columns.droplevel(level=1)

In [5]:

#Read locally, for debugging, and drop the 2nd row of headers
#dfRaw = pd.read_table('../../Data/Proprietary/LA.txt',comment='#',header=[0,1],na_values='-')
#dfRaw.columns = dfRaw.columns.droplevel(level=1)

In [6]:

#Tidy the data
rowHeadings = ['county_cd', 'county_nm', 'state_cd', 'state_name', 'year']
dfTidy = pd.melt(dfRaw,id_vars=rowHeadings,value_name='MGal',var_name='Group')
dfTidy.shape

Out[6]:

(17664, 7)

In [7]:

#Remove rows that don't have volume data
dfTidy = dfTidy[dfTidy['Group'].str.contains('Mgal')]
dfTidy.shape

Out[7]:

(15040, 7)

In [8]:

#Change the type of the MGal column to float
dfTidy['MGal'] = dfTidy.MGal.astype(np.float)
dfTidy['MGal'].sum()

Out[8]:

50620.130000000005

In [9]:

#Fill in Source and Destination columns
dfTidy.loc[dfTidy['Group'].str.startswith('Public Supply'), 'Destination'] = 'PS'

In [10]:

#Summarize 
dfState = dfTidy.groupby(['Group'])['MGal'].sum()
dfState.to_csv('{}.csv'.format(state))