f=open('AviationData.txt','r')
aviation_data=f.readlines()
aviation_list = []
for line in aviation_data:
aviation_list.append(line.split(" | "))
# Write a linear time algorithm
lax_code=[]
for lt in aviation_list:
if "LAX94LA336" in lt:
lax_code.append(lt)
# writing a log(n) time algorithm: use the bisect module to perform binary search
import bisect
sorted_aviation_list = sorted(aviation_list, key = lambda row: row[2]) # O(n log n)?
sorted_accident_numbers = [row[2] for row in sorted_aviation_list] # O(log n)
lax_index = bisect.bisect_left(sorted_accident_numbers, "LAX94LA336")
aviation_dict_list=[]
header = aviation_data[0].split(" | ")
for row in aviation_data[1:]:
splited_dict_row= dict(zip(header, row.split(" | ")))
aviation_dict_list.append(splited_dict_row)
lax_dict=[]
for row in aviation_dict_list:
if "LAX94LA336"in row.values():
lax_dict.append(row)
# O(n)
states_acc=[]
for x in range(len(aviation_dict_list)):
loc=aviation_dict_list[x]['Location'].split(',')[-1]
if aviation_dict_list[x]['Investigation Type']=='Accident' and aviation_dict_list[x]['Country']=='United States':
states_acc.append(loc)
import collections
state_accidents = collections.Counter(states_acc)
state_accidents.most_common(1)
[(' CA', 7877)]
# CA has the most accidents
monthly_injuries={}
for x in range(len(aviation_dict_list)):
if aviation_dict_list[x]['Total Fatal Injuries']=='':
aviation_dict_list[x]['Total Fatal Injuries']=0
if aviation_dict_list[x]['Total Serious Injuries']=='':
aviation_dict_list[x]['Total Serious Injuries']=0
fatal_injury=int(aviation_dict_list[x]['Total Fatal Injuries'])+int(aviation_dict_list[x]['Total Serious Injuries'])
month=aviation_dict_list[x]['Event Date'].split('/')[0]
if month in monthly_injuries:
monthly_injuries[month]+=fatal_injury
else:
monthly_injuries[month]=fatal_injury
monthly_injuries['00'] = monthly_injuries.pop('')
monthly_injuries_order=dict(sorted(monthly_injuries.items(), key=lambda x:x[0]))
import matplotlib.pyplot as plt
fatal_injury_month_list=list(monthly_injuries_order.keys())
fatal_injury_list=list(monthly_injuries_order.values())
plt.scatter(x=fatal_injury_month_list,y=fatal_injury_list)
<matplotlib.collections.PathCollection at 0x7fe672f13450>
# More injuries in summer