#!/usr/bin/env python # coding: utf-8 # In[21]: # Render our plots inline get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import matplotlib.pyplot as plt # Make the graphs a bit prettier, and bigger plt.style.use('ggplot') plt.rcParams['figure.figsize'] = (15, 5) # # 1.1 Reading data from a csv file # You can read data from a CSV file using the `read_csv` function. By default, it assumes that the fields are comma-separated. # # We're going to be looking some cyclist data from Montréal. Here's the [original page](http://donnees.ville.montreal.qc.ca/dataset/velos-comptage) (in French), but it's already included in this repository. We're using the data from 2012. # # This dataset is a list of how many people were on 7 different bike paths in Montreal, each day. # In[22]: broken_df = pd.read_csv('../data/bikes.csv',encoding = "ISO-8859-1") # In[23]: # Look at the first 3 rows broken_df[:3] # You'll notice that this is totally broken! `read_csv` has a bunch of options that will let us fix that, though. Here we'll # # * change the column separator to a `;` # * Set the encoding to `'latin1'` (the default is `'utf8'`) # * Parse the dates in the 'Date' column # * Tell it that our dates have the day first instead of the month first # * Set the index to be the 'Date' column # In[24]: fixed_df = pd.read_csv('../data/bikes.csv', sep=';', encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date') fixed_df[:3] # # 1.2 Selecting a column # When you read a CSV, you get a kind of object called a `DataFrame`, which is made up of rows and columns. You get columns out of a DataFrame the same way you get elements out of a dictionary. # # Here's an example: # In[25]: fixed_df['Berri 1'] # # 1.3 Plotting a column # Just add `.plot()` to the end! How could it be easier? =) # # We can see that, unsurprisingly, not many people are biking in January, February, and March, # In[26]: fixed_df['Berri 1'].plot() # We can also plot all the columns just as easily. We'll make it a little bigger, too. # You can see that it's more squished together, but all the bike paths behave basically the same -- if it's a bad day for cyclists, it's a bad day everywhere. # In[27]: fixed_df.plot(figsize=(15, 10)) # # 1.4 Putting all that together # Here's the code we needed to write do draw that graph, all together: # In[28]: df = pd.read_csv('../data/bikes.csv', sep=';', encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date') df['Berri 1'].plot() #