#!/usr/bin/env python # coding: utf-8 # In[3]: import csv from datetime import datetime from dateutil.relativedelta import relativedelta # Open the CSV file for a president with open('obama_commutations.csv') as csvfile: reader = csv.DictReader(csvfile) rows = [row for row in reader] # In[4]: rows # Figure out the indicies where each individual entry begins individualSplits = [] for i, row in enumerate(rows): # Names don't have a key column, and the next row always has key "Offense:" try: if rows[i]['key'] == '' and (i == 0 or rows[i+1]['key'] == 'Offense:'): individualSplits.append(i) except: pass # In[5]: individuals = [] # Parse each individual into a dict with values representing the name and the offense for i, split in enumerate(individualSplits): try: nextSplit = individualSplits[i + 1] except: nextSplit = len(rows) individual = { 'name': '', 'offense': '', } individual['name'] += rows[split]['info'] for x in range(split + 1, nextSplit): if rows[x]['key'] == 'Offense:' or (rows[x]['key'] == '' and rows[x - 1]['key'] == 'Offense:'): individual['offense'] += rows[x]['info'] individuals.append(individual) # In[6]: drug_words = [ "cocaine", "marijuana", "controlled substance", "drug", "distribute", "distribution", "heroin", "LSD", "manufacture", ] offenses = { 'drug': 0, 'conspiracy': 0 } for person in individuals: crime = person['offense'].lower() if any(word in crime for word in drug_words): offenses['drug'] += 1 if "conspiracy" in crime: offenses['conspiracy'] += 1 offenses['drug'] = offenses['drug'] / len(individuals) offenses['conspiracy'] = offenses['conspiracy'] / len(individuals) # In[7]: print(offenses)