Use Python and pandas with CrisisNET

In [57]:
# import required modules
import requests as re
import pandas as pd
In [58]:
# Set pandas to display a maximum of 30 columns
pd.set_option('display.max_columns', 30)
In [59]:
# Create a variable with your CrisisNET API key
api_key = '532d8dc4ed3329652f114b73'

# Setup the request header
headers = {'Authorization': 'Bearer ' + api_key}
In [60]:
# Setup the request's URL
url = 'http://api.crisis.net/item?%(tags)s&%(before)s&%(after)s&%(text)s&%(location)s&%(radius)s&%(limit)s&%(sources)s&%(licenses)s&%(offset)s'

# Create a list of filters
filters =   {'tags' : 'tags=', # a list of keywords seperated by comma
             'after' : 'after=', # a timestamp in ISO 8601 format
             'before' : 'before=', # a timestamp in ISO 8601 format
             'text' : 'text=', # words seperated by a '+'
             'location' : 'location=', # latitude and longitude
             'radius' : 'radius=', # a number in meters
             'limit' : 'limit=', # the maximum number of records to return
             'sources' : 'sources=reliefweb', # a list of sources
             'licenses' : 'licenses=', # the types of licenses
             'offset' : 'offset=' # for multiple requests
            }

# Create the formatted request URL
formattedURL = url % filters
In [61]:
# Request data from CrisisNET
r = re.get(formattedURL, headers=headers)

# Check to make sure the pull was successful
# If successful, we will see "Response 200"
print(r)
<Response [200]>
In [62]:
# Create a dataframe from the request's json format
request_df = pd.DataFrame(r.json())
In [63]:
# View the first five rows of the request dataframe
request_df.head(1)
Out[63]:
data total
0 {'language': {'code': 'en'}, 'id': 'AcVAF46mQC... 458

1 rows × 2 columns

In [64]:
# Create a dataframe from the request's data
df = request_df['data'].apply(pd.Series)
In [65]:
# View the first five rows of the dataframe
df.head(1)
Out[65]:
content createdAt entities geo id language license lifespan publishedAt remoteID source summary tags updatedAt
0 As at 31 Mar 2014, 11 provinces (nine in north... 2014-05-09T19:07:06.931101 [Afghanistan, Badakhshan, Faryab] {'coords': [72, 36.75], 'addressComponents': {... AcVAF46mQC6X_qpvC2SQpA {'code': 'en'} unknown temporary 2014-04-25T00:00:00.000Z 14539 reliefweb Afghanistan: Flash Floods and Landslides - Apr... [{'confidence': 1, 'name': 'flood'}, {'confide... 2014-05-28T22:03:57.208119

1 rows × 14 columns

In [66]:
# Check the length of the dataframe
len(df)
Out[66]:
25
In [67]:
# Set the row index of the dataframe to be the time the report was updated
df["updatedAt"] = pd.to_datetime(df["updatedAt"])
df.index = df['updatedAt']
In [68]:
# Expand the geo column into a full dataframe
geo_df = df['geo'].apply(pd.Series)
geo_df.head(100)
Out[68]:
addressComponents coords
updatedAt
2014-05-28 22:03:57.208119 {'formattedAddress': 'Afghanistan', 'adminArea... [72, 36.75]
2014-05-28 22:03:57.034668 {'adminArea5': 'Ulytauskiy rayon', 'formattedA... [67.17916870117188, 48.14600372314453]
2014-05-28 22:05:39.977156 {'adminArea5': 'Paiwas', 'formattedAddress': '... [-85.04560089111328, 12.92113971710205]
2014-05-28 22:05:31.228032 {'formattedAddress': '3580000 Linares, Maule, ... [-71.67467498779297, -35.78622817993164]
2014-05-28 22:05:37.263112 {'formattedAddress': '75 S Broadway Ave, Peru,... [-86.06803245842457, 40.752463191747665]
2014-05-28 22:05:37.355127 {'adminArea4': 'Ambato', 'adminArea5': 'Pilagu... [-78.83322143554688, -1.3420000076293945]
2014-05-28 22:05:32.931298 {'formattedAddress': 'Sierra Leone', 'adminAre... [-11.843890190124512, 8.521441459655762]
2014-05-28 22:05:37.733648 {'formattedAddress': 'Guinea', 'adminArea1': '... [-10.98954963684082, 10.429302215576172]
2014-05-28 22:05:38.475624 {'adminArea5': 'Biankouma', 'formattedAddress'... [-7.5975751876831055, 7.765665054321289]
2014-05-28 22:05:38.652440 {'formattedAddress': 'Mali', 'adminArea1': 'Ma... [-3.5273818969726562, 17.35776710510254]
2014-05-28 22:05:38.829726 {'formattedAddress': 'Liberia', 'adminArea1': ... [-9.323492050170898, 6.411512851715088]
2014-05-28 22:05:38.741804 {'formattedAddress': 'Senegal', 'adminArea1': ... [-14.531643867492676, 14.36251163482666]
2014-05-28 22:05:40.146017 {'formattedAddress': 'Fiji West', 'adminArea3'... [178.1472625732422, -17.658161163330078]
2014-05-28 22:05:31.136630 {'formattedAddress': 'Vanuatu', 'adminArea3': ... [168.13926696777344, -16.623371124267578]
2014-05-28 22:05:31.317778 {'formattedAddress': 'Fiji', 'adminArea1': 'Fi... [178.0944061279297, -17.045461654663086]
2014-05-28 22:05:37.081416 {'formattedAddress': 'Solomon Islands', 'admin... [160.01930236816406, -9.548112869262695]
2014-05-28 22:05:40.644162 {'formattedAddress': 'Seychelles', 'adminArea1... [55.47166061401367, -4.669795036315918]
2014-05-28 22:03:56.091954 {'adminArea5': 'Dili', 'formattedAddress': 'Hi... [125.570556640625, -8.569040298461914]
2014-05-28 22:05:31.732318 {'adminArea5': 'Tabang', 'formattedAddress': '... [119.51933288574219, -2.9553489685058594]
2014-05-28 22:05:34.035228 {'adminArea5': 'Villavicencio', 'formattedAddr... [-73.62193298339844, 4.132339954376221]
2014-05-28 22:05:37.632689 {'adminArea5': 'Verapaz', 'formattedAddress': ... [-88.87079620361328, 13.642239570617676]
2014-05-28 22:05:33.787201 {'formattedAddress': 'Saint Lucia', 'adminArea... [-60.9665641784668, 13.86330509185791]
2014-05-28 22:05:33.933798 {'adminArea5': 'Bequia', 'formattedAddress': '... [-61.229007720947266, 13.02966022491455]
2014-05-28 22:05:34.186057 {'formattedAddress': 'Dominica', 'adminArea1':... [-61.33945846557617, 15.3991060256958]
2014-05-28 22:05:35.341267 {'postalCode': '78663-000', 'adminArea5': 'São... [-52.788028717041016, -10.81116008758545]

25 rows × 2 columns

In [69]:
# Expand the address components column into it's own dataframe
geo_admin_df = geo_df['addressComponents'].apply(pd.Series)
geo_admin_df.head(1)
Out[69]:
adminArea1 adminArea3 adminArea4 adminArea5 formattedAddress postalCode streetAddress
updatedAt
2014-05-28 22:03:57.208119 Afghanistan NaN NaN NaN Afghanistan NaN NaN

1 rows × 7 columns

In [70]:
# Join the two geo dataframes to the primary dataframe
df = pd.concat([df[:], geo_admin_df[:], geo_df[:]], axis=1)
In [71]:
# Extract the latitute and longitude coordinates into their own columns
df['latitude'] = df['coords'].str[1]
df['longitude'] = df['coords'].str[0]
In [72]:
# Expand the tags column into its own dataframe
tags_df = df['tags'].apply(pd.Series)
In [73]:
# Drop everything column after the fourth
tags_df = tags_df.ix[:, 0:1]
In [74]:
# Add titles to the columns
tags_df.columns = ['tag1', 'tag2']
tags_df.columns
Out[74]:
Index(['tag1', 'tag2'], dtype='object')
In [75]:
# View the first few rows of the tags dataframe
tags_df.head(1)
Out[75]:
tag1 tag2
updatedAt
2014-05-28 22:03:57.208119 {'confidence': 1, 'name': 'flood'} {'confidence': 1, 'name': 'flash-flood'}

1 rows × 2 columns

In [76]:
# Create a tag extractor function that leaves missing observations (which are floats) alone, but converts other values into dicts, then extracts the name value.

# Create a function called tag_extractor,
def tag_extractor(x):
    # that, if x is a string,
    if type(x) is float:
        # just returns it untouched
        return x
    # but, if not,
    elif x:
        # converts x to a dict(),
        x = dict(x)
        # and returns the value from the name key
        return x['name']
    # and leaves everything else
    else:
        return
In [77]:
# Apply the function to every cell in the dataframe
tags_df = tags_df.applymap(tag_extractor)
In [78]:
# Join the tags dataframe with the primary dataframe
df = pd.concat([df[:], tags_df[:]], axis=1)
In [79]:
# Expand the language column into it's own dataframe and return the language code column to the original dataframe
lang_df = df['language'].apply(pd.Series)
df['lang'] = lang_df['code']
In [80]:
# Drop some extra columns to clean up the dataframe
df = df.drop(['geo', 'updatedAt', 'addressComponents', 'language', 'tags', 'coords', 'id', 'remoteID', ], axis=1)
In [81]:
# View the final dataframe
df.head()
Out[81]:
content createdAt entities license lifespan publishedAt source summary adminArea1 adminArea3 adminArea4 adminArea5 formattedAddress postalCode streetAddress latitude longitude tag1 tag2 lang
updatedAt
2014-05-28 22:03:57.208119 As at 31 Mar 2014, 11 provinces (nine in north... 2014-05-09T19:07:06.931101 [Afghanistan, Badakhshan, Faryab] unknown temporary 2014-04-25T00:00:00.000Z reliefweb Afghanistan: Flash Floods and Landslides - Apr... Afghanistan NaN NaN NaN Afghanistan NaN NaN 36.750000 72.000000 flood flash-flood en
2014-05-28 22:03:57.034668 A flash flood triggered by a dam burst on 31 M... 2014-05-09T17:08:52.744816 [Kokpekty, Kazakhstan, Ulytauskiy rayon, Qarag... unknown temporary 2014-04-11T00:00:00.000Z reliefweb Kazakhstan: Flash Floods - Apr 2014 Kazakhstan Qaraghandy NaN Ulytauskiy rayon Ulytauskiy rayon, Kazakhstan NaN NaN 48.146004 67.179169 flash-flood flash-flood en
2014-05-28 22:05:39.977156 On 10 Apr 2014, a shallow 6.2-magnitude earthq... 2014-04-29T22:34:14.876123 [Managua, Lago, Nicaragua, Len, Paiwas, Región... unknown temporary 2014-04-11T00:00:00.000Z reliefweb Nicaragua: Earthquake - Apr 2014 Nicaragua Región Autónoma Atlántico Sur NaN Paiwas NIC-13, Paiwas, Nicaragua NaN NIC-13 12.921140 -85.045601 earthquake armed-conflict en
2014-05-28 22:05:31.228032 On 1 Apr 2014, a 8.2-magnitude earthquake occu... 2014-04-29T22:34:09.003365 [Honduras, Chile, Iquique, Panama, Gestin, Gua... unknown temporary 2014-04-01T00:00:00.000Z reliefweb Chile: Earthquake - Apr 2014 Chile Maule Linares Linares 3580000 Linares, Maule, Chile 3580000 3580000 Linares -35.786228 -71.674675 earthquake tsunami en
2014-05-28 22:05:37.263112 On 1 Apr 2014, a 8.2-magnitude earthquake occu... 2014-04-29T22:34:18.449345 [Honduras, Chile, Iquique, Panama, Gestin, Gua... unknown temporary 2014-04-01T00:00:00.000Z reliefweb Chile: Earthquake - Apr 2014 United States IN Miami Co. Peru 75 S Broadway Ave, Peru, IN 46970 46970 75 S Broadway Ave 40.752463 -86.068032 earthquake tsunami en

5 rows × 20 columns