import pandas as pd
import hvplot.pandas
To read from parquet with pandas, make sure pyarrow
or fastparquet
is installed, e.g: conda install pyarrow
df = pd.read_parquet('https://s3.amazonaws.com/esipfed-scratch/wns')
df.head()
state | county | year | |
---|---|---|---|
0 | Minnesota | St. Louis | 2015 |
1 | Minnesota | Lake | 2015 |
2 | Washington | King | 2015 |
3 | Minnesota | Becker | 2015 |
4 | Wisconsin | Douglas | 2015 |
a = df.groupby(['state','year']).count()
a.head(10)
county | ||
---|---|---|
state | year | |
Alabama | 2011 | 2 |
2013 | 2 | |
2014 | 3 | |
2015 | 3 | |
2016 | 4 | |
Arkansas | 2011 | 1 |
2013 | 2 | |
2014 | 8 | |
2015 | 3 | |
2016 | 1 |
a['county'].unstack(level=0, fill_value=0)
state | Alabama | Arkansas | Connecticut | Delaware | Georgia | Illinois | Indiana | Iowa | Kansas | Kentucky | ... | South Carolina | South Dakota | Tennessee | Texas | Vermont | Virginia | Washington | West Virginia | Wisconsin | Wyoming |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
year | |||||||||||||||||||||
2006 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2007 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 |
2008 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 3 | 5 | 0 | 1 | 0 | 0 |
2009 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 2 | 0 | 1 | 4 | 0 | 6 | 0 | 0 |
2010 | 0 | 0 | 2 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | ... | 0 | 0 | 1 | 0 | 0 | 9 | 0 | 4 | 0 | 0 |
2011 | 2 | 1 | 0 | 1 | 0 | 0 | 6 | 0 | 0 | 2 | ... | 0 | 0 | 8 | 0 | 0 | 1 | 0 | 3 | 0 | 0 |
2012 | 0 | 0 | 0 | 0 | 3 | 4 | 1 | 0 | 0 | 13 | ... | 1 | 0 | 21 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
2013 | 2 | 2 | 0 | 0 | 5 | 0 | 1 | 0 | 0 | 7 | ... | 2 | 0 | 15 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
2014 | 3 | 8 | 0 | 0 | 1 | 8 | 0 | 2 | 0 | 1 | ... | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 5 | 0 |
2015 | 3 | 3 | 0 | 0 | 2 | 2 | 0 | 5 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 6 | 0 |
2016 | 4 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | ... | 4 | 0 | 0 | 6 | 0 | 0 | 1 | 0 | 12 | 0 |
2017 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | ... | 3 | 2 | 0 | 4 | 0 | 0 | 0 | 0 | 1 | 1 |
12 rows × 43 columns
foo = a['county'].unstack(level=0, fill_value=0).cumsum()
p = foo.hvplot.area(stacked=True, height=500, width=800).options(legend_position='right', legend_offset=(10,-100))
p
p * p['Ohio'].options(color='white')