# import required modules
import requests as re
import pandas as pd
# Set pandas to display a maximum of 30 columns
pd.set_option('display.max_columns', 30)
# Create a variable with your CrisisNET API key
api_key = '532d8dc4ed3329652f114b73'
# Setup the request header
headers = {'Authorization': 'Bearer ' + api_key}
# Setup the request's URL
url = 'http://api.crisis.net/item?%(tags)s&%(before)s&%(after)s&%(text)s&%(location)s&%(radius)s&%(limit)s&%(sources)s&%(licenses)s&%(offset)s'
# Create a list of filters
filters = {'tags' : 'tags=', # a list of keywords seperated by comma
'after' : 'after=', # a timestamp in ISO 8601 format
'before' : 'before=', # a timestamp in ISO 8601 format
'text' : 'text=', # words seperated by a '+'
'location' : 'location=', # latitude and longitude
'radius' : 'radius=', # a number in meters
'limit' : 'limit=', # the maximum number of records to return
'sources' : 'sources=reliefweb', # a list of sources
'licenses' : 'licenses=', # the types of licenses
'offset' : 'offset=' # for multiple requests
}
# Create the formatted request URL
formattedURL = url % filters
# Request data from CrisisNET
r = re.get(formattedURL, headers=headers)
# Check to make sure the pull was successful
# If successful, we will see "Response 200"
print(r)
<Response [200]>
# Create a dataframe from the request's json format
request_df = pd.DataFrame(r.json())
# View the first five rows of the request dataframe
request_df.head(1)
data | total | |
---|---|---|
0 | {'language': {'code': 'en'}, 'id': 'AcVAF46mQC... | 458 |
1 rows × 2 columns
# Create a dataframe from the request's data
df = request_df['data'].apply(pd.Series)
# View the first five rows of the dataframe
df.head(1)
content | createdAt | entities | geo | id | language | license | lifespan | publishedAt | remoteID | source | summary | tags | updatedAt | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | As at 31 Mar 2014, 11 provinces (nine in north... | 2014-05-09T19:07:06.931101 | [Afghanistan, Badakhshan, Faryab] | {'coords': [72, 36.75], 'addressComponents': {... | AcVAF46mQC6X_qpvC2SQpA | {'code': 'en'} | unknown | temporary | 2014-04-25T00:00:00.000Z | 14539 | reliefweb | Afghanistan: Flash Floods and Landslides - Apr... | [{'confidence': 1, 'name': 'flood'}, {'confide... | 2014-05-28T22:03:57.208119 |
1 rows × 14 columns
# Check the length of the dataframe
len(df)
25
# Set the row index of the dataframe to be the time the report was updated
df["updatedAt"] = pd.to_datetime(df["updatedAt"])
df.index = df['updatedAt']
# Expand the geo column into a full dataframe
geo_df = df['geo'].apply(pd.Series)
geo_df.head(100)
addressComponents | coords | |
---|---|---|
updatedAt | ||
2014-05-28 22:03:57.208119 | {'formattedAddress': 'Afghanistan', 'adminArea... | [72, 36.75] |
2014-05-28 22:03:57.034668 | {'adminArea5': 'Ulytauskiy rayon', 'formattedA... | [67.17916870117188, 48.14600372314453] |
2014-05-28 22:05:39.977156 | {'adminArea5': 'Paiwas', 'formattedAddress': '... | [-85.04560089111328, 12.92113971710205] |
2014-05-28 22:05:31.228032 | {'formattedAddress': '3580000 Linares, Maule, ... | [-71.67467498779297, -35.78622817993164] |
2014-05-28 22:05:37.263112 | {'formattedAddress': '75 S Broadway Ave, Peru,... | [-86.06803245842457, 40.752463191747665] |
2014-05-28 22:05:37.355127 | {'adminArea4': 'Ambato', 'adminArea5': 'Pilagu... | [-78.83322143554688, -1.3420000076293945] |
2014-05-28 22:05:32.931298 | {'formattedAddress': 'Sierra Leone', 'adminAre... | [-11.843890190124512, 8.521441459655762] |
2014-05-28 22:05:37.733648 | {'formattedAddress': 'Guinea', 'adminArea1': '... | [-10.98954963684082, 10.429302215576172] |
2014-05-28 22:05:38.475624 | {'adminArea5': 'Biankouma', 'formattedAddress'... | [-7.5975751876831055, 7.765665054321289] |
2014-05-28 22:05:38.652440 | {'formattedAddress': 'Mali', 'adminArea1': 'Ma... | [-3.5273818969726562, 17.35776710510254] |
2014-05-28 22:05:38.829726 | {'formattedAddress': 'Liberia', 'adminArea1': ... | [-9.323492050170898, 6.411512851715088] |
2014-05-28 22:05:38.741804 | {'formattedAddress': 'Senegal', 'adminArea1': ... | [-14.531643867492676, 14.36251163482666] |
2014-05-28 22:05:40.146017 | {'formattedAddress': 'Fiji West', 'adminArea3'... | [178.1472625732422, -17.658161163330078] |
2014-05-28 22:05:31.136630 | {'formattedAddress': 'Vanuatu', 'adminArea3': ... | [168.13926696777344, -16.623371124267578] |
2014-05-28 22:05:31.317778 | {'formattedAddress': 'Fiji', 'adminArea1': 'Fi... | [178.0944061279297, -17.045461654663086] |
2014-05-28 22:05:37.081416 | {'formattedAddress': 'Solomon Islands', 'admin... | [160.01930236816406, -9.548112869262695] |
2014-05-28 22:05:40.644162 | {'formattedAddress': 'Seychelles', 'adminArea1... | [55.47166061401367, -4.669795036315918] |
2014-05-28 22:03:56.091954 | {'adminArea5': 'Dili', 'formattedAddress': 'Hi... | [125.570556640625, -8.569040298461914] |
2014-05-28 22:05:31.732318 | {'adminArea5': 'Tabang', 'formattedAddress': '... | [119.51933288574219, -2.9553489685058594] |
2014-05-28 22:05:34.035228 | {'adminArea5': 'Villavicencio', 'formattedAddr... | [-73.62193298339844, 4.132339954376221] |
2014-05-28 22:05:37.632689 | {'adminArea5': 'Verapaz', 'formattedAddress': ... | [-88.87079620361328, 13.642239570617676] |
2014-05-28 22:05:33.787201 | {'formattedAddress': 'Saint Lucia', 'adminArea... | [-60.9665641784668, 13.86330509185791] |
2014-05-28 22:05:33.933798 | {'adminArea5': 'Bequia', 'formattedAddress': '... | [-61.229007720947266, 13.02966022491455] |
2014-05-28 22:05:34.186057 | {'formattedAddress': 'Dominica', 'adminArea1':... | [-61.33945846557617, 15.3991060256958] |
2014-05-28 22:05:35.341267 | {'postalCode': '78663-000', 'adminArea5': 'São... | [-52.788028717041016, -10.81116008758545] |
25 rows × 2 columns
# Expand the address components column into it's own dataframe
geo_admin_df = geo_df['addressComponents'].apply(pd.Series)
geo_admin_df.head(1)
adminArea1 | adminArea3 | adminArea4 | adminArea5 | formattedAddress | postalCode | streetAddress | |
---|---|---|---|---|---|---|---|
updatedAt | |||||||
2014-05-28 22:03:57.208119 | Afghanistan | NaN | NaN | NaN | Afghanistan | NaN | NaN |
1 rows × 7 columns
# Join the two geo dataframes to the primary dataframe
df = pd.concat([df[:], geo_admin_df[:], geo_df[:]], axis=1)
# Extract the latitute and longitude coordinates into their own columns
df['latitude'] = df['coords'].str[1]
df['longitude'] = df['coords'].str[0]
# Expand the tags column into its own dataframe
tags_df = df['tags'].apply(pd.Series)
# Drop everything column after the fourth
tags_df = tags_df.ix[:, 0:1]
# Add titles to the columns
tags_df.columns = ['tag1', 'tag2']
tags_df.columns
Index(['tag1', 'tag2'], dtype='object')
# View the first few rows of the tags dataframe
tags_df.head(1)
tag1 | tag2 | |
---|---|---|
updatedAt | ||
2014-05-28 22:03:57.208119 | {'confidence': 1, 'name': 'flood'} | {'confidence': 1, 'name': 'flash-flood'} |
1 rows × 2 columns
# Create a tag extractor function that leaves missing observations (which are floats) alone, but converts other values into dicts, then extracts the name value.
# Create a function called tag_extractor,
def tag_extractor(x):
# that, if x is a string,
if type(x) is float:
# just returns it untouched
return x
# but, if not,
elif x:
# converts x to a dict(),
x = dict(x)
# and returns the value from the name key
return x['name']
# and leaves everything else
else:
return
# Apply the function to every cell in the dataframe
tags_df = tags_df.applymap(tag_extractor)
# Join the tags dataframe with the primary dataframe
df = pd.concat([df[:], tags_df[:]], axis=1)
# Expand the language column into it's own dataframe and return the language code column to the original dataframe
lang_df = df['language'].apply(pd.Series)
df['lang'] = lang_df['code']
# Drop some extra columns to clean up the dataframe
df = df.drop(['geo', 'updatedAt', 'addressComponents', 'language', 'tags', 'coords', 'id', 'remoteID', ], axis=1)
# View the final dataframe
df.head()
content | createdAt | entities | license | lifespan | publishedAt | source | summary | adminArea1 | adminArea3 | adminArea4 | adminArea5 | formattedAddress | postalCode | streetAddress | latitude | longitude | tag1 | tag2 | lang | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
updatedAt | ||||||||||||||||||||
2014-05-28 22:03:57.208119 | As at 31 Mar 2014, 11 provinces (nine in north... | 2014-05-09T19:07:06.931101 | [Afghanistan, Badakhshan, Faryab] | unknown | temporary | 2014-04-25T00:00:00.000Z | reliefweb | Afghanistan: Flash Floods and Landslides - Apr... | Afghanistan | NaN | NaN | NaN | Afghanistan | NaN | NaN | 36.750000 | 72.000000 | flood | flash-flood | en |
2014-05-28 22:03:57.034668 | A flash flood triggered by a dam burst on 31 M... | 2014-05-09T17:08:52.744816 | [Kokpekty, Kazakhstan, Ulytauskiy rayon, Qarag... | unknown | temporary | 2014-04-11T00:00:00.000Z | reliefweb | Kazakhstan: Flash Floods - Apr 2014 | Kazakhstan | Qaraghandy | NaN | Ulytauskiy rayon | Ulytauskiy rayon, Kazakhstan | NaN | NaN | 48.146004 | 67.179169 | flash-flood | flash-flood | en |
2014-05-28 22:05:39.977156 | On 10 Apr 2014, a shallow 6.2-magnitude earthq... | 2014-04-29T22:34:14.876123 | [Managua, Lago, Nicaragua, Len, Paiwas, Región... | unknown | temporary | 2014-04-11T00:00:00.000Z | reliefweb | Nicaragua: Earthquake - Apr 2014 | Nicaragua | Región Autónoma Atlántico Sur | NaN | Paiwas | NIC-13, Paiwas, Nicaragua | NaN | NIC-13 | 12.921140 | -85.045601 | earthquake | armed-conflict | en |
2014-05-28 22:05:31.228032 | On 1 Apr 2014, a 8.2-magnitude earthquake occu... | 2014-04-29T22:34:09.003365 | [Honduras, Chile, Iquique, Panama, Gestin, Gua... | unknown | temporary | 2014-04-01T00:00:00.000Z | reliefweb | Chile: Earthquake - Apr 2014 | Chile | Maule | Linares | Linares | 3580000 Linares, Maule, Chile | 3580000 | 3580000 Linares | -35.786228 | -71.674675 | earthquake | tsunami | en |
2014-05-28 22:05:37.263112 | On 1 Apr 2014, a 8.2-magnitude earthquake occu... | 2014-04-29T22:34:18.449345 | [Honduras, Chile, Iquique, Panama, Gestin, Gua... | unknown | temporary | 2014-04-01T00:00:00.000Z | reliefweb | Chile: Earthquake - Apr 2014 | United States | IN | Miami Co. | Peru | 75 S Broadway Ave, Peru, IN 46970 | 46970 | 75 S Broadway Ave | 40.752463 | -86.068032 | earthquake | tsunami | en |
5 rows × 20 columns