In [3]:
import csv 
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Open the CSV file for a president
with open('obama_commutations.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = [row for row in reader]
In [4]:
rows

# Figure out the indicies where each individual entry begins
individualSplits = []

for i, row in enumerate(rows):
    # Names don't have a key column, and the next row always has key "Offense:"
    try:
        if rows[i]['key'] == '' and (i == 0 or rows[i+1]['key'] == 'Offense:'):
            individualSplits.append(i)
    except:
        pass
In [5]:
individuals = []

# Parse each individual into a dict with values representing the name and the offense
for i, split in enumerate(individualSplits):
    try:
        nextSplit = individualSplits[i + 1]
    except: 
        nextSplit = len(rows)
    
    individual = {
        'name': '',
        'offense': '',
    }
    
    individual['name'] += rows[split]['info']
    
    for x in range(split + 1, nextSplit):
        
        if rows[x]['key'] == 'Offense:' or (rows[x]['key'] == '' and rows[x - 1]['key'] == 'Offense:'):
            individual['offense'] += rows[x]['info']
        
    individuals.append(individual)
In [6]:
drug_words = [
    "cocaine",
    "marijuana",
    "controlled substance",
    "drug",
    "distribute",
    "distribution",
    "heroin",
    "LSD",
    "manufacture",
]

offenses = {
    'drug': 0,
    'conspiracy': 0
}

for person in individuals:
    
    crime = person['offense'].lower()
    
    if any(word in crime for word in drug_words):
        offenses['drug'] += 1
    
    if "conspiracy" in crime:
        offenses['conspiracy'] += 1

offenses['drug'] = offenses['drug'] / len(individuals)
offenses['conspiracy'] = offenses['conspiracy'] / len(individuals)
In [7]:
print(offenses)
{'conspiracy': 0.6227371469949312, 'drug': 0.9840695148443157}