# Set up data
N = 10000
data = pd.DataFrame({
'Weight': 200*numpy.random.rand(N),
'Length': 40*numpy.random.rand(N),
'Count': numpy.random.randint(0, 100, N),
'Year': numpy.random.randint(1995, 2012, N),
'Width': 100*numpy.random.rand(N),
'Height': 30*numpy.random.rand(N),
'City': numpy.random.randint(0, 3, N),
'Cars': numpy.random.randint(0, 6, N),
})
city = 'Bangalore Chennai Delhi Mumbai'.split()
cars = 'Mercedes Premier Jaguar BMW Maruti Audi Porsche MM'.split()
data['City'] = data['City'].apply(lambda v: city[v])
data['Cars'] = data['Cars'].apply(lambda v: cars[v])
data['Weight Series'] = data['Weight']
# Arguments
calc = {
'Density': lambda v: v['Weight'] / (v['Length'] * v['Width'] * v['Height']),
'Height %': lambda v: v['Height'],
'Aspect ratio': lambda v: 1,
}
format = {
'City' : lambda x: '{:s}'.format(str(x)),
'Count': lambda x: '{:,.0f}'.format(float(x)),
'Year' : lambda x: '{:d}'.format(int(x)),
'Weight' : lambda x: '{:,.1f} kg'.format(x),
'Length' : lambda x: '{:,.1f} cm'.format(x),
'Width' : lambda x: '{:,.1f} cm'.format(x),
'Height' : lambda x: '{:,.1f} cm'.format(x),
'Height %': lambda x, data: '{:.0f}%'.format(100. * x / data['Height'].max()),
'Aspect ratio': lambda x, row: '{:,.1f}'.format(row['Length'] / row['Width']),
'Weight Series': lambda x: '{:.1f} kg'.format(x) if numpy.isscalar(x) else SVG('sparkline.svg', width=100, height=30, data=x[:20]) ,
}
headers = {
'Count': '#',
'Weight': 'Weight (kg)',
}
agg = {
'City': lambda v: v.iget(0) + ', ...',
'Count': numpy.sum,
'Cars': 'count', # lambda v: v.iget(0) + ', ...',
'Year': numpy.mean,
'Weight': numpy.mean,
'Length': numpy.mean,
'Width': numpy.mean,
'Height': numpy.mean,
'Weight Series': lambda v: tuple(v),
}
kwargs = {'data':data, 'format':format, 'agg': agg, 'headers': headers, 'classes':True}
# Specific configuration
groupby = ['City', 'Cars']
groups = data.groupby(groupby)
by = {k: v for k, v in agg.iteritems() if k not in groupby}
total = len(groupby) - 1
subgroupby = groupby[:total]
subgroups = data.groupby(subgroupby)
subby = {k: v for k, v in agg.iteritems() if k not in subgroupby}
subdata = subgroups.sum() if not subby else subgroups.agg(subby)
data = groups.sum() if not by else groups.agg(by)
if calc is not None:
for new_column, fn in calc.iteritems():
data[new_column] = fn(data)
if subdata is not None:
subdata[new_column] = fn(subdata)
indices = {False: [], None: groupby, True: data.index.names}.get(None, None)
index_col = {col: i for i, col in enumerate(data.index.names)}
index_pos = {col: index_col[col] for col in indices}