#!/usr/bin/env python # coding: utf-8 # # Tidy FAA survey # # In response to a public records request made by The Times, the FAA provided a decade of its annual estimates about the activity of America's most common helicopter models. This data is not published online. The records provided to The Times were manually input into a spreadsheet. # # This notebook filters the released data to population counts and flight-hour estimates for models that have appeared in all ten years of survey data. # In[24]: import os import pandas as pd # In[25]: get_ipython().run_line_magic('store', '-r') # Read in the raw survey data. # In[26]: survey = pd.read_csv(os.path.join(input_dir, "faa_general_aviation_survey.csv")) # Identify all the models that have been surveyed by the FAA # In[27]: model_counts = survey.groupby([ 'latimes_make', 'latimes_model' ]).size().rename("survey_count").reset_index() # In[28]: model_counts # Filter down to models that appear in all ten years of survey data # In[29]: qualified_models = model_counts[model_counts.survey_count == 10][ ['latimes_make', 'latimes_model'] ] # List them # In[30]: qualified_models # Filter down the survey data to only include those models that have qualified for analysis. # In[31]: qualified_survey = survey.merge( qualified_models, on=["latimes_make", "latimes_model"], how="inner" ) # Trim down the columns. # In[32]: trimmed_qualified_survey = qualified_survey[[ 'year', 'latimes_make', 'latimes_model', 'population_count', 'active_count', 'total_hours' ]] # In[33]: cleaned_qualified_survey = trimmed_qualified_survey.sort_values([ 'latimes_make', 'latimes_model', 'year' ]) # Create a combined make and model column. # In[34]: cleaned_qualified_survey['latimes_make_and_model'] = cleaned_qualified_survey.latimes_make + " " + cleaned_qualified_survey.latimes_model # Output the result. # In[35]: cleaned_qualified_survey.to_csv(os.path.join(output_dir, "tidy-survey.csv"), index=False)