# import required modules import requests as re import pandas as pd # Set pandas to display a maximum of 30 columns pd.set_option('display.max_columns', 30) # Create a variable with your CrisisNET API key api_key = '532d8dc4ed3329652f114b73' # Setup the request header headers = {'Authorization': 'Bearer ' + api_key} # Setup the request's URL url = 'http://api.crisis.net/item?%(tags)s&%(before)s&%(after)s&%(text)s&%(location)s&%(radius)s&%(limit)s&%(sources)s&%(licenses)s&%(offset)s' # Create a list of filters filters = {'tags' : 'tags=', # a list of keywords seperated by comma 'after' : 'after=', # a timestamp in ISO 8601 format 'before' : 'before=', # a timestamp in ISO 8601 format 'text' : 'text=', # words seperated by a '+' 'location' : 'location=', # latitude and longitude 'radius' : 'radius=', # a number in meters 'limit' : 'limit=', # the maximum number of records to return 'sources' : 'sources=reliefweb', # a list of sources 'licenses' : 'licenses=', # the types of licenses 'offset' : 'offset=' # for multiple requests } # Create the formatted request URL formattedURL = url % filters # Request data from CrisisNET r = re.get(formattedURL, headers=headers) # Check to make sure the pull was successful # If successful, we will see "Response 200" print(r) # Create a dataframe from the request's json format request_df = pd.DataFrame(r.json()) # View the first five rows of the request dataframe request_df.head(1) # Create a dataframe from the request's data df = request_df['data'].apply(pd.Series) # View the first five rows of the dataframe df.head(1) # Check the length of the dataframe len(df) # Set the row index of the dataframe to be the time the report was updated df["updatedAt"] = pd.to_datetime(df["updatedAt"]) df.index = df['updatedAt'] # Expand the geo column into a full dataframe geo_df = df['geo'].apply(pd.Series) geo_df.head(100) # Expand the address components column into it's own dataframe geo_admin_df = geo_df['addressComponents'].apply(pd.Series) geo_admin_df.head(1) # Join the two geo dataframes to the primary dataframe df = pd.concat([df[:], geo_admin_df[:], geo_df[:]], axis=1) # Extract the latitute and longitude coordinates into their own columns df['latitude'] = df['coords'].str[1] df['longitude'] = df['coords'].str[0] # Expand the tags column into its own dataframe tags_df = df['tags'].apply(pd.Series) # Drop everything column after the fourth tags_df = tags_df.ix[:, 0:1] # Add titles to the columns tags_df.columns = ['tag1', 'tag2'] tags_df.columns # View the first few rows of the tags dataframe tags_df.head(1) # Create a tag extractor function that leaves missing observations (which are floats) alone, but converts other values into dicts, then extracts the name value. # Create a function called tag_extractor, def tag_extractor(x): # that, if x is a string, if type(x) is float: # just returns it untouched return x # but, if not, elif x: # converts x to a dict(), x = dict(x) # and returns the value from the name key return x['name'] # and leaves everything else else: return # Apply the function to every cell in the dataframe tags_df = tags_df.applymap(tag_extractor) # Join the tags dataframe with the primary dataframe df = pd.concat([df[:], tags_df[:]], axis=1) # Expand the language column into it's own dataframe and return the language code column to the original dataframe lang_df = df['language'].apply(pd.Series) df['lang'] = lang_df['code'] # Drop some extra columns to clean up the dataframe df = df.drop(['geo', 'updatedAt', 'addressComponents', 'language', 'tags', 'coords', 'id', 'remoteID', ], axis=1) # View the final dataframe df.head()