#!/usr/bin/env python
# coding: utf-8

# # Using agate in a Jupyter notebook
# 
# First we import agate. Then we create an agate Table by loading data from a CSV file.

# In[2]:


import agate

table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv')

print(table)


# ## Question 1: What was the total cost to Kansas City area counties?
# 
# To answer this question, we first must filter the table to only those rows which refer to a Kansas City area `county`.

# In[3]:


kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE'))

print(len(table.rows))
print(len(kansas_city.rows))


# We can then print the Sum of the costs of all those rows. (The cost column is named `total_cost`.)

# In[4]:


print('$%d' % kansas_city.columns['total_cost'].aggregate(agate.Sum()))


# ## Question 2: Which counties spent the most?
# 
# This question is more complicated. First we group the data by `county`, which gives us a TableSet named `counties`. A TableSet is a group of tables with the same columns.

# In[5]:


# Group by county
counties = table.group_by('county')

print(counties.keys())


# We then use the `aggregate` function to sum the `total_cost` column for each table in the group. The resulting values are collapsed into a new table, `totals`, which has a row for each county and a column named `total_cost_sum` containing the new total.

# In[6]:


# Aggregate totals for all counties
totals = counties.aggregate([
    ('total_cost', agate.Sum(), 'total_cost_sum')
])

print(totals.column_names)


# Finally, we sort the counties by their total cost, limit the results to the top 10 and then print the results as a text bar chart.

# In[8]:


totals.order_by('total_cost_sum', reverse=True).limit(20).print_bars('county', 'total_cost_sum', width=100)