#!/usr/bin/env python # coding: utf-8 # # Analyzing Data # ## Prison Helicopter Escapes # We begin by importing some helper functions # In[11]: from helper import * # ## Get the Data # Now, let's get the data from the [List of helicopter prison escapes](https://en.wikipedia.org/wiki/List_of_helicopter_prison_escapes) Wikipedia article. # In[12]: url = 'https://en.wikipedia.org/wiki/List_of_helicopter_prison_escapes' data = data_from_url(url) # Let's print the first three rows # In[13]: for row in data[:3]: print(data) # ## Removing the Details # We set an `index` variable with the value of `0`. This will help us track which row we're modifying. # In[14]: index = 0 for row in data: data[index] = row[:-1] index += 1 # In[15]: print(data[:3]) # ## Extracting the Year # We iterate over `data` using the iterable variable `row`. `row[0]` refers to the first entry of row: the `date`. With `date = fetch_year(row[0])`, we're extracting the year out of the date in `row[0]` and assiging it to the variable `date`. We then replace the value of `row[0]` with the year that we just extracted. # In[16]: for row in data: date = fetch_year(row[0]) row[0] = date # In[17]: print(data[:3]) # ## Attempts per Year # First, we need to find the earliest and latest years in `data` # In[18]: min_year = min(data, key=lambda x: x[0])[0] max_year = max(data, key=lambda x: x[0])[0] # In[19]: print(min_year) print(max_year) # Now we'll create a list of all the years from `min_year` to `max_year`. To determine how many prison break attempts there were for each year, we need to make sure we capture all years in the range, including the years in which there were no prison break. # In[20]: years = [] for year in range(min_year, max_year + 1): years.append(year) print(years) # Next, we create a list where each element look like `[, 0]` # In[22]: attempts_per_year = [] for year in years: attempts_per_year.append([year,0]) print(attempts_per_year) # To determine how many attempts there were in each year, we will create a loop within a loop. We will iterate over `data` and then over all the rows in `attempts_per_year`. When the year in iteration over `data` matches the year in iteration over `attempts_per_year`, we will increment the second entry of `attempts_per_year` by 1 # In the code cell below, we iterate over `data` using the iterable variable `row`. Then, we iterate over `attempts_per_year` using the iterable variable `year_attempt`. # In[23]: for row in data: for year_attempt in attempts_per_year: year = year_attempt[0] if row[0] == year: year_attempt[1] += 1 print(attempts_per_year) # In[24]: get_ipython().run_line_magic('matplotlib', 'inline') barplot(attempts_per_year) # The most helicopter prison break attempts occurred in the following years: 1986, 2001, 2007 and 2009, with a total of three attempts each. # ## Attempts by Country # In[25]: countries_frequency = df["Country"].value_counts() # In[26]: print_pretty_table(countries_frequency) # France is the country with the most helicopter prison escape attempts. # In[ ]: