#!/usr/bin/env python # coding: utf-8 # # Download NTSB Aviation Accidents Database # A machine-readable copy of the database is [published online](https://www.ntsb.gov/_layouts/ntsb.aviation/index.aspx) in Microsoft Access format. Let's download it. # In[5]: import os import pypyodbc # Configure the data directories. # In[6]: get_ipython().run_line_magic('store', '-r') # Remove the zip file if it already exists. # In[8]: zip_path = os.path.join(input_dir, "avall.zip") # In[10]: get_ipython().system('rm -f $zip_path') # Download the file. # In[4]: get_ipython().system('wget -O $input_dir/avall.zip -nc https://app.ntsb.gov/avdata/Access/avall.zip') # Unzip the file. # In[5]: get_ipython().system('unzip -n $input_dir/\\*.zip -d $input_dir/') # Use the [mdbtools](https://github.com/brianb/mdbtools) command-line tool to get a list of all the tables in the database # In[13]: mdb_path = os.path.join(input_dir, 'avall.mdb') # In[14]: tables = get_ipython().getoutput('mdb-tables $mdb_path') # Split them into a Python list # In[15]: tables = tables[0].split() # Extract the tables from the Microsoft Access database and write them out as CSVs. # In[16]: def export_table(name): print(f"Exporting {name}") export_path = os.path.join(input_dir, f"{name.lower()}.csv") get_ipython().system('mdb-export $mdb_path $name > $export_path') # In[17]: for t in tables: export_table(t)