#!/usr/bin/env python # coding: utf-8 # # Using data APIs with Python # ## The Very Hungry Caterpillar exampe # In[121]: get_ipython().run_line_magic('matplotlib', 'inline') from IPython.display import HTML import matplotlib.pyplot as plt import requests import pandas as pd import numpy as np import seaborn as sns sns.set_style('white') sns.set_context('talk', font_scale=1.2) # In[101]: HTML('

Hmm, I don't know about this caterpillar rearing manual. I thought P.rapae had an obligate association w/ Brassica. pic.twitter.com/M10dqbOYlN

— Christie Bahlai (@cbahlai) מאי 10, 2015
') # In[102]: HTML('

This is a terrible dataset about caterpillar diet. How did it got published? pic.twitter.com/XkAq51HxEP

— Timothée Poisot (@tpoi) אפריל 23, 2015
') # In[99]: HTML('

@tpoi @kara_woo @cbahlai pic.twitter.com/5lj9EzuKjW

— Yoav Ram (@yoavram) May 10, 2015
') # In[103]: HTML('

[blog] How hungry are caterpillars anyway? http://t.co/SvImkHYHhR #opendata

— Timothée Poisot (@tpoi) May 10, 2015
') # We will learn how to use the Global Biotic Interactions (globi) API with Python to check **[How hungry are caterpillars anyway?](http://timotheepoisot.fr/2015/05/10/hungry-caterpillars/)** (sort of). # # First, have a look at the [API](http://www.globalbioticinteractions.org/) and the [API docs](https://github.com/jhpoelen/eol-globi-data/wiki/API). It is a RESTful API that returns responses in JSON format over HTTP. # # - HTTP: protocol for transfering text files on the internet # - JSON: file format, very similar to Python's `dict`. # - REST: a common convention for designing web applications that allow querying and retrieving (and sometimes creating, changing and deleting) data. # # Let's try it, following [Poisot's lead on _The Very Hungry Caterpillar_](http://timotheepoisot.fr/2015/05/10/hungry-caterpillars/). # # ![caterpillar](http://1.bp.blogspot.com/-gZhDuc_eD_U/U8To75wL9UI/AAAAAAAAOD4/Q7LDDM7B3H4/s1600/the-very-hungry-caterpillar-2.jpg) # # We will use [_requests_](http://docs.python-requests.org/en/latest/) - a Python HTTP library _for humans_. # In[109]: response = requests.get("http://api.globalbioticinteractions.org/interaction?sourceTaxon=Pieris&interactionType=eats") print("OK:", response.ok) # The respose payload is in JSON format. Calling the `json` method will return the payload as a `dict`: # In[110]: payload = response.json() print(len(payload)) print(payload.keys()) # The response has two fields, `columns` and `data`, corresponding to the data frame's column names and rows. That's great because we can push it right into a `pandas.DataFrame`: # In[74]: print(payload['columns']) # In[75]: print(payload['data'][0]) # In[112]: df = pd.DataFrame(data['data'], columns=data['columns']) print(df.shape) df.head() # Let's see what each caterpillar eats. We got the `eats` interactions, so let's just leave the source and target taxons: # In[113]: cols = df.columns.tolist() cols.remove('source_taxon_name') cols.remove('target_taxon_name') print(cols) # In[114]: df.drop(labels=cols, axis=1, inplace=True) df.head() # Next, we count how many target taxons occur for each source taxon. For that, we group by source and aggregate by length (I made sure before that each source-target pair appears only once. How??). # # The `groupby` made `source_taxon_name` become an index rather than a column and that's why we call `reset_index`. # In[115]: table = df.groupby(by='source_taxon_name').aggregate(len).reset_index() table.head() # Finally we rename the columns to make them more meaningful and we sort the table by the number of target taxons. Then we print and plot: # In[116]: table = table.rename(columns={'source_taxon_name':'Pieris species', 'target_taxon_name': 'Number of known items in diet'}) table = table.sort('Number of known items in diet', ascending=False) table # In[124]: table.plot(x="Pieris species", y="Number of known items in diet", kind="barh", legend=False) plt.ylabel('Number of known items in diet') plt.grid(False) sns.despine() # Pieris rapae # ![Pieris rapae](http://upload.wikimedia.org/wikipedia/commons/b/b4/Pieris.rapae.caterpillar.jpg) # # Pieris brassicae # ![Pieris brassicae](http://upload.wikimedia.org/wikipedia/commons/5/52/Pieris.brassicae.caterpillar.jpg)