A simple demonstration of pivot_table. Reshaping and Pivot Tables — pandas 0.13.1 documentation
import pandas as pd
from pandas import DataFrame, Series
df = DataFrame([{
'year':1880,
'name':'John',
'sex': 'M',
'births': 13
},
{'year':1880,
'name':'Pat',
'sex': 'M',
'births': 13
},
{'year':1880,
'name':'Pat',
'sex': 'F',
'births': 13
},
{
'year':1880,
'name':'Jane',
'sex': 'F',
'births': 20
},
{
'year':1881,
'name':'John',
'sex': 'M',
'births': 90
},
{
'year':1881,
'name':'Jane',
'sex': 'F',
'births': 21
},])
df
births | name | sex | year | |
---|---|---|---|---|
0 | 13 | John | M | 1880 |
1 | 13 | Pat | M | 1880 |
2 | 13 | Pat | F | 1880 |
3 | 20 | Jane | F | 1880 |
4 | 90 | John | M | 1881 |
5 | 21 | Jane | F | 1881 |
6 rows × 4 columns
pt = df.pivot_table(rows='year', cols=['name','sex'])['births']
pt
name | Jane | John | Pat | |
---|---|---|---|---|
sex | F | M | F | M |
year | ||||
1880 | 20 | 13 | 13 | 13 |
1881 | 21 | 90 | NaN | NaN |
2 rows × 4 columns
# let's make a new table in which there is M/F subindex for all names
names = set(pt.columns.get_level_values(level=0))
sexes = set(pt.columns.get_level_values(level=1))
names, sexes
({'Jane', 'John', 'Pat'}, {'F', 'M'})
# http://pandas.pydata.org/pandas-docs/stable/indexing.html#creating-a-multiindex-hierarchical-index-object
new_index = pd.MultiIndex.from_product([list(names), list(sexes)],
names=['name','sex'])
new_index
MultiIndex(levels=[[u'Jane', u'John', u'Pat'], [u'F', u'M']], labels=[[0, 0, 1, 1, 2, 2], [1, 0, 1, 0, 1, 0]], names=[u'name', u'sex'])
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html
pt.T.reindex(new_index).T
name | Jane | John | Pat | |||
---|---|---|---|---|---|---|
sex | M | F | M | F | M | F |
year | ||||||
1880 | NaN | 20 | 13 | NaN | 13 | 13 |
1881 | NaN | 21 | 90 | NaN | NaN | NaN |
2 rows × 6 columns
pt.T.reindex(new_index).T.fillna(0)
name | Jane | John | Pat | |||
---|---|---|---|---|---|---|
sex | M | F | M | F | M | F |
year | ||||||
1880 | 0 | 20 | 13 | 0 | 13 | 13 |
1881 | 0 | 21 | 90 | 0 | 0 | 0 |
2 rows × 6 columns
df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum'))
births | year | |||
---|---|---|---|---|
year | name | sex | ||
1880 | Jane | F | 20 | 1880 |
John | M | 13 | 1880 | |
Pat | F | 13 | 1880 | |
M | 13 | 1880 | ||
1881 | Jane | F | 21 | 1881 |
John | M | 90 | 1881 |
6 rows × 2 columns
df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum')).unstack()
births | year | ||||
---|---|---|---|---|---|
sex | F | M | F | M | |
year | name | ||||
1880 | Jane | 20 | NaN | 1880 | NaN |
John | NaN | 13 | NaN | 1880 | |
Pat | 13 | 13 | 1880 | 1880 | |
1881 | Jane | 21 | NaN | 1881 | NaN |
John | NaN | 90 | NaN | 1881 |
5 rows × 4 columns