#!/usr/bin/env python # coding: utf-8 # # Using agate in a Jupyter notebook # # First we import agate. Then we create an agate Table by loading data from a CSV file. # In[2]: import agate table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv') print(table) # ## Question 1: What was the total cost to Kansas City area counties? # # To answer this question, we first must filter the table to only those rows which refer to a Kansas City area `county`. # In[3]: kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE')) print(len(table.rows)) print(len(kansas_city.rows)) # We can then print the Sum of the costs of all those rows. (The cost column is named `total_cost`.) # In[4]: print('$%d' % kansas_city.columns['total_cost'].aggregate(agate.Sum())) # ## Question 2: Which counties spent the most? # # This question is more complicated. First we group the data by `county`, which gives us a TableSet named `counties`. A TableSet is a group of tables with the same columns. # In[5]: # Group by county counties = table.group_by('county') print(counties.keys()) # We then use the `aggregate` function to sum the `total_cost` column for each table in the group. The resulting values are collapsed into a new table, `totals`, which has a row for each county and a column named `total_cost_sum` containing the new total. # In[6]: # Aggregate totals for all counties totals = counties.aggregate([ ('total_cost', agate.Sum(), 'total_cost_sum') ]) print(totals.column_names) # Finally, we sort the counties by their total cost, limit the results to the top 10 and then print the results as a text bar chart. # In[8]: totals.order_by('total_cost_sum', reverse=True).limit(20).print_bars('county', 'total_cost_sum', width=100)