# import required modules
import requests as re
import pandas as pd

# Set pandas to display a maximum of 30 columns
pd.set_option('display.max_columns', 30)

# Create a variable with your CrisisNET API key
api_key = '532d8dc4ed3329652f114b73'

# Setup the request header
headers = {'Authorization': 'Bearer ' + api_key}

# Setup the request's URL
url = 'http://api.crisis.net/item?%(tags)s&%(before)s&%(after)s&%(text)s&%(location)s&%(radius)s&%(limit)s&%(sources)s&%(licenses)s&%(offset)s'

# Create a list of filters
filters =   {'tags' : 'tags=', # a list of keywords seperated by comma
             'after' : 'after=', # a timestamp in ISO 8601 format
             'before' : 'before=', # a timestamp in ISO 8601 format
             'text' : 'text=', # words seperated by a '+'
             'location' : 'location=', # latitude and longitude
             'radius' : 'radius=', # a number in meters
             'limit' : 'limit=', # the maximum number of records to return
             'sources' : 'sources=reliefweb', # a list of sources
             'licenses' : 'licenses=', # the types of licenses
             'offset' : 'offset=' # for multiple requests
            }

# Create the formatted request URL
formattedURL = url % filters

# Request data from CrisisNET
r = re.get(formattedURL, headers=headers)

# Check to make sure the pull was successful
# If successful, we will see "Response 200"
print(r)

# Create a dataframe from the request's json format
request_df = pd.DataFrame(r.json())

# View the first five rows of the request dataframe
request_df.head(1)

# Create a dataframe from the request's data
df = request_df['data'].apply(pd.Series)

# View the first five rows of the dataframe
df.head(1)

# Check the length of the dataframe
len(df)

# Set the row index of the dataframe to be the time the report was updated
df["updatedAt"] = pd.to_datetime(df["updatedAt"])
df.index = df['updatedAt']

# Expand the geo column into a full dataframe
geo_df = df['geo'].apply(pd.Series)
geo_df.head(100)

# Expand the address components column into it's own dataframe
geo_admin_df = geo_df['addressComponents'].apply(pd.Series)
geo_admin_df.head(1)

# Join the two geo dataframes to the primary dataframe
df = pd.concat([df[:], geo_admin_df[:], geo_df[:]], axis=1)

# Extract the latitute and longitude coordinates into their own columns
df['latitude'] = df['coords'].str[1]
df['longitude'] = df['coords'].str[0]

# Expand the tags column into its own dataframe
tags_df = df['tags'].apply(pd.Series)

# Drop everything column after the fourth
tags_df = tags_df.ix[:, 0:1]

# Add titles to the columns
tags_df.columns = ['tag1', 'tag2']
tags_df.columns

# View the first few rows of the tags dataframe
tags_df.head(1)

# Create a tag extractor function that leaves missing observations (which are floats) alone, but converts other values into dicts, then extracts the name value.

# Create a function called tag_extractor,
def tag_extractor(x):
    # that, if x is a string,
    if type(x) is float:
        # just returns it untouched
        return x
    # but, if not,
    elif x:
        # converts x to a dict(),
        x = dict(x)
        # and returns the value from the name key
        return x['name']
    # and leaves everything else
    else:
        return

# Apply the function to every cell in the dataframe
tags_df = tags_df.applymap(tag_extractor)

# Join the tags dataframe with the primary dataframe
df = pd.concat([df[:], tags_df[:]], axis=1)

# Expand the language column into it's own dataframe and return the language code column to the original dataframe
lang_df = df['language'].apply(pd.Series)
df['lang'] = lang_df['code']

# Drop some extra columns to clean up the dataframe
df = df.drop(['geo', 'updatedAt', 'addressComponents', 'language', 'tags', 'coords', 'id', 'remoteID', ], axis=1)

# View the final dataframe
df.head()