In [6]:
!date
Thu Jun 26 10:11:57 PDT 2014
In [7]:
import pandas as pd
In [8]:
df = pd.DataFrame({'A': [0,0,0,0,1,1],
                   'B': [1,2,3,4,5,6],
                   'C': [8,9,10,11,12,13]})
In [9]:
df.describe()
Out[9]:
A B C
count 6.000000 6.000000 6.000000
mean 0.333333 3.500000 10.500000
std 0.516398 1.870829 1.870829
min 0.000000 1.000000 8.000000
25% 0.000000 2.250000 9.250000
50% 0.000000 3.500000 10.500000
75% 0.750000 4.750000 11.750000
max 1.000000 6.000000 13.000000
In [10]:
df.groupby('A').describe()
Out[10]:
B C
A
0 count 4.000000 4.000000
mean 2.500000 9.500000
std 1.290994 1.290994
min 1.000000 8.000000
25% 1.750000 8.750000
50% 2.500000 9.500000
75% 3.250000 10.250000
max 4.000000 11.000000
1 count 2.000000 2.000000
mean 5.500000 12.500000
std 0.707107 0.707107
min 5.000000 12.000000
25% 5.250000 12.250000
50% 5.500000 12.500000
75% 5.750000 12.750000
max 6.000000 13.000000
In [11]:
df.groupby('A').describe().unstack()
Out[11]:
B C
count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max
A
0 4 2.5 1.290994 1 1.75 2.5 3.25 4 4 9.5 1.290994 8 8.75 9.5 10.25 11
1 2 5.5 0.707107 5 5.25 5.5 5.75 6 2 12.5 0.707107 12 12.25 12.5 12.75 13
In [12]:
df.groupby('A').describe().unstack()\
    .loc[:,(slice(None),['count','mean']),]
Out[12]:
B C
count mean count mean
A
0 4 2.5 4 9.5
1 2 5.5 2 12.5
In [ ]: