import pandas as pd import matplotlib.pyplot as plt df = pd.read_csv('/Users/danielforsyth/Desktop/bikes.csv') df.head() df = df[['LOCATION_B','THEFT_DATE','THEFT_YEAR','THEFT_HOUR','STOLEN_VAL','UCR']] df.columns = ['Location','Date','Year','Hour','Value','Value_Code'] df.head() pd.options.display.mpl_style = 'default' from matplotlib import rcParams rcParams['figure.figsize'] = (20, 8) rcParams['figure.dpi'] = 300 df['Date'] = pd.to_datetime(pd.Series(df['Date'])) df.set_index('Date', drop=False, inplace=True) df['Thefts'] = df['Date'].value_counts() plt.figure() plt.ylim(0,20) df['Thefts'].plot(x_compat=True) temp = pd.read_csv('/Users/danielforsyth/Desktop/climate.csv', parse_dates={'Date': ['MO','DAY','YEAR']}, usecols=['MO', 'DAY', 'YEAR', 'HIGH']) temp.head() temp.plot() #merge df and temp merged = pd.merge(df, temp, on='Date', how='outer') merged.columns = ['Location','Date','Year','Hour','Value','Value_Code','Thefts','Temp'] merged.head() new = merged.sort('Thefts', ascending=False) new = new.dropna() new.head() thefts = merged['Date'].value_counts() thefts.head(10) #thefts per year 2013 , missing 3 months (october-december) yearly = df.Year.value_counts() yearly #Thefts per Month df['Date'] = df['Date'].map(lambda x: str(x)[:2]) df['Date'] = df['Date'].map(lambda x: x.rstrip('/')) monthly = df.Date.value_counts() monthly df.Hour.value_counts() df.Value.value_counts() df.Value_Code.value_counts() loc = df.Location.value_counts() loc.head(25) value = df[['Location','Value_Code']] value.head()