#!/usr/bin/env python
# coding: utf-8

# # Tidy FAA survey
# 
# In response to a public records request made by The Times, the FAA provided a decade of its annual estimates about the activity of America's most common helicopter models. This data is not published online. The records provided to The Times were manually input into a spreadsheet.
# 
# This notebook filters the released data to population counts and flight-hour estimates for models that have appeared in all ten years of survey data.

# In[24]:


import os
import pandas as pd


# In[25]:


get_ipython().run_line_magic('store', '-r')


# Read in the raw survey data.

# In[26]:


survey = pd.read_csv(os.path.join(input_dir, "faa_general_aviation_survey.csv"))


# Identify all the models that have been surveyed by the FAA

# In[27]:


model_counts = survey.groupby([
    'latimes_make',
    'latimes_model'
]).size().rename("survey_count").reset_index()


# In[28]:


model_counts


# Filter down to models that appear in all ten years of survey data

# In[29]:


qualified_models = model_counts[model_counts.survey_count == 10][
    ['latimes_make', 'latimes_model']
]


# List them

# In[30]:


qualified_models


# Filter down the survey data to only include those models that have qualified for analysis.

# In[31]:


qualified_survey = survey.merge(
    qualified_models,
    on=["latimes_make", "latimes_model"],
    how="inner"
)


# Trim down the columns.

# In[32]:


trimmed_qualified_survey  = qualified_survey[[
    'year',
    'latimes_make',
    'latimes_model',
    'population_count',
    'active_count',
    'total_hours'
]]


# In[33]:


cleaned_qualified_survey  = trimmed_qualified_survey.sort_values([
    'latimes_make',
    'latimes_model',
    'year'
])


# Create a combined make and model column.

# In[34]:


cleaned_qualified_survey['latimes_make_and_model'] = cleaned_qualified_survey.latimes_make + " " + cleaned_qualified_survey.latimes_model


# Output the result.

# In[35]:


cleaned_qualified_survey.to_csv(os.path.join(output_dir, "tidy-survey.csv"), index=False)