import os
if not os.path.exists('deaths2008.csv'):
!wget 'https://raw.githubusercontent.com/AllenDowney/ElementsOfDataScience/master/data/deaths2008.csv'
import pandas as pd
df = pd.read_csv('deaths2008.csv')
df.head()
Date | City | Cause of Death | |
---|---|---|---|
0 | 08/13/2008 | Logar - Afghanistan | Terrorist Action |
1 | 06/21/2008 | Sanmartin And Catamarca - Salta - Argentina | Veh. Accident-Auto |
2 | 07/16/2008 | Rodriguez - Argentina | Drowning |
3 | 01/07/2008 | 23klm Minnie Bridge - Great Northern Highway ... | Veh. Accid-Auto |
4 | 02/11/2008 | Bendigo - Victoria - Australia | Other Accident |
sheet_id = '1knrtYbzETmcK91jO9TBgejPNOcQN8gbocoDGkdnXwXQ'
url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv'
df = pd.read_csv(url)
df.head()
Date | City | Cause of Death | |
---|---|---|---|
0 | 08/13/2008 | Logar - Afghanistan | Terrorist Action |
1 | 06/21/2008 | Sanmartin And Catamarca - Salta - Argentina | Veh. Accident-Auto |
2 | 07/16/2008 | Rodriguez - Argentina | Drowning |
3 | 01/07/2008 | 23klm Minnie Bridge - Great Northern Highway ... | Veh. Accid-Auto |
4 | 02/11/2008 | Bendigo - Victoria - Australia | Other Accident |
city_country = df['City'].str.split(expand=True)
df['City'] = city_country[0]
df['Country'] = city_country[2]
df.head()
Date | City | Cause of Death | Country | |
---|---|---|---|---|
0 | 08/13/2008 | Logar | Terrorist Action | Afghanistan |
1 | 06/21/2008 | Sanmartin | Veh. Accident-Auto | Catamarca |
2 | 07/16/2008 | Rodriguez | Drowning | Argentina |
3 | 01/07/2008 | 23klm | Veh. Accid-Auto | Bridge |
4 | 02/11/2008 | Bendigo | Other Accident | Victoria |
cause_subcause = df['Cause of Death'].str.split('-', expand=True)
cause_subcause.head()
0 | 1 | |
---|---|---|
0 | Terrorist Action | None |
1 | Veh. Accident | Auto |
2 | Drowning | None |
3 | Veh. Accid | Auto |
4 | Other Accident | None |
df['Cause'] = cause_subcause[0]
df['Subcause'] = cause_subcause[1]
df.head()
Date | City | Cause of Death | Country | Cause | Subcause | |
---|---|---|---|---|---|---|
0 | 08/13/2008 | Logar | Terrorist Action | Afghanistan | Terrorist Action | None |
1 | 06/21/2008 | Sanmartin | Veh. Accident-Auto | Catamarca | Veh. Accident | Auto |
2 | 07/16/2008 | Rodriguez | Drowning | Argentina | Drowning | None |
3 | 01/07/2008 | 23klm | Veh. Accid-Auto | Bridge | Veh. Accid | Auto |
4 | 02/11/2008 | Bendigo | Other Accident | Victoria | Other Accident | None |
mexico = df['Country'] == 'Mexico'
mexico.sum()
23
df[mexico]
Date | City | Cause of Death | Country | Cause | Subcause | |
---|---|---|---|---|---|---|
344 | 01/06/2008 | Jalisco | Homicide | Mexico | Homicide | None |
349 | 01/26/2008 | Morelia | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
355 | 02/04/2008 | Jalisco | Suicide | Mexico | Suicide | None |
361 | 02/16/2008 | Cuernavaca | Other Accident | Mexico | Other Accident | None |
364 | 02/23/2008 | Jalisco | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
377 | 03/22/2008 | Puebla | Suicide | Mexico | Suicide | None |
380 | 03/25/2008 | Tlalnepantla | Other Accident | Mexico | Other Accident | None |
386 | 04/05/2008 | Oaxaca | Drowning | Mexico | Drowning | None |
400 | 05/11/2008 | Tamaulipas | Homicide | Mexico | Homicide | None |
413 | 05/31/2008 | Yucatan | Suicide | Mexico | Suicide | None |
422 | 06/19/2008 | Tamaulipas | Suicide | Mexico | Suicide | None |
428 | 06/24/2008 | Merida | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
433 | 07/02/2008 | Yucatan | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
435 | 07/06/2008 | Coahuila | Air Accident | Mexico | Air Accident | None |
441 | 07/17/2008 | Jalisco | Drowning | Mexico | Drowning | None |
444 | 07/21/2008 | Chihuahua | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
501 | 09/27/2008 | Jalisco | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
526 | 11/05/2008 | Sonora | Veh. Accid-Other | Mexico | Veh. Accid | Other |
532 | 11/10/2008 | Sonora | Veh. Accid-Motorcy. | Mexico | Veh. Accid | Motorcy. |
545 | 11/28/2008 | Guanajuato | Homicide | Mexico | Homicide | None |
547 | 12/03/2008 | Nayarit | Veh. Accid-Motorcy. | Mexico | Veh. Accid | Motorcy. |
552 | 12/14/2008 | Sinaloa | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
553 | 12/14/2008 | Sinaloa | Veh. Accid-Auto | Mexico | Veh. Accid | Auto |
for name, group in df[mexico].groupby('Cause'):
print(name, len(group))
Air Accident 1 Drowning 2 Homicide 3 Other Accident 2 Suicide 4 Veh. Accid 11