import pandas as pd schedule = pd.DataFrame.from_csv("phillies.csv") schedule.info() schedule.head() schedule.drop(["REMINDER_OFF", "REMINDER_ON", "START_TIME_ET", "END_DATE", "END_DATE_ET", "END_TIME", "END_TIME_ET", "REMINDER_TIME", "REMINDER_TIME_ET", "SHOWTIMEAS_FREE", "SHOWTIMEAS_BUSY", "REMINDER_DATE"], axis=1, inplace=True) schedule.head() schedule.DESCRIPTION.head() description = schedule.DESCRIPTION[2] print description def tv_stations_from_description(description): """Return a list of television stations embedded in the given description.""" return [station.strip() for station in description.split(":")[1].split("-----")[0].split("--")] result = tv_stations_from_description(description) print result assert(len(result) == 2) description = schedule.DESCRIPTION[0] print description result = tv_stations_from_description(description) print result assert(len(result) == 1) stations_series = schedule.DESCRIPTION.apply( lambda description: [station.strip() for station in description.split(":")[1].split("-----")[0].split("--")]) stations_series set([station for stations in stations_series.values for station in stations]) schedule[(schedule.DESCRIPTION.str.contains("NBC 10")) | (schedule.DESCRIPTION.str.contains("FOX"))]