#!/usr/bin/env python # coding: utf-8 # ## CAOS Workshop Series: Introduction to Python Programming # ### Chris McCray - 21 February 2018 # Basic Python syntax (from https://www.stavros.io/tutorials/python/) # * Python has no mandatory statement termination characters (i.e. semicolons) # * Single-line comments start with a # # * Variable values are assigned using = # * Python automatically sets the data type (i.e. int, float, str) # * Basic data structures: lists, tuples, dictionaries (index of the first item is 0) # Python allows for a great deal of freedom, but does have a style guide (PEP 8) to make your code generally cleaner and more user-friendly: https://www.python.org/dev/peps/pep-0008/ # # Many good intro to Python resources can be found online (check them out for more detail than we can go through here) - e.g., Python tutorial: https://docs.python.org/3/tutorial/ # # Python code can be run in scripts, with an IDE (see Spyder), in Jupyter notebooks like this one # Jupyter Notebook: # * Allows code to be run in blocks/"cells" # * Allows for easy presentation of figures and code # * Can be hosted remotely (i.e. on a server, closer to your data) # ### Variables, data types, and printing # Variable types are automatically assigned - no type declarations needed # # Variable names # * Can start with lowercase or uppercase letters (but not numbers or symbols!) # * Can contain numbers and certain symbols # * Words should be separated by underscores # # Printing # * In Python 3, printing output requires parentheses: print(variable_name) # * In Python 2, parentheses are not required (but can be included): print variable_name # In[1]: #Integers my_integer = 3 print(type(my_integer)) #Floats my_float = 1.23 print(type(my_float)) #Strings string_1 = 'This is a string.' string_2 = "This is also a string!" print(type(string_1)) # In[2]: print(my_integer) print('Test') print('Test',my_integer) # In[3]: print(string_1+my_integer) # In[ ]: print(string_1+' '+str(my_integer)) # In[4]: print(string_1, my_integer) # ### Lists and dictionaries # ##### Lists # In[5]: list_one = ['a','b','c','d','e','f'] print(list_one[0]) print(list_one[-1]) # ![image.png](attachment:image.png) # ##### Dictionaries # In[6]: ages = {'Jim': 23, 'Sarah': 25, 'Tom': 30} print(ages['Jim']) # ### Control Flow Tools/Loops # ##### While loop # In[7]: b = 10 while b <= 20: print (b) b+=1 # ##### If statements # In[9]: x = int(input("Please enter an integer:")) if x < 0: print(x,'is less than 0.') elif x == 0: print(x,'is 0.') else: print(x,'is greater than 0.') # ##### For loop # In[10]: words = ['cat', 'horse', 'chicken'] for w in words: print(w, len(w)) # ### Modules and functions # * Python source files (ending in .py) are known as **modules** # * Modules contain **functions**, which can be called separated from the module # * Below is an example of what's inside the module _welcome.py_ (modified from https://developers.google.com/edu/python/introduction) # * Try running welcome.py by typing ```python hello.py``` and your name, in your command line # ```python # #!/usr/bin/env python # # # import modules used here -- sys is a very standard one # import sys # # # Gather our code in a main() function # def main(): # print('Hello there', sys.argv[1]) # # Command line args are in sys.argv[1], sys.argv[2] ... # # sys.argv[0] is the script name itself and can be ignored # # def string_information(string): # num_characters = len(string) # num_no_whitespace = (len(string.replace(" ", ""))) # print('This string has',num_characters,'characters including \\ # whitespace and',num_no_whitespace,'without whitespace.') # # # Standard boilerplate to call the main() function to begin # # the program. # if __name__ == '__main__': # main() # # ``` # Functions can be imported into other modules: # (welcome.py can be downloaded at https://github.com/mccrayc/tutorials/blob/master/1_intro/welcome.py) # In[ ]: from welcome import string_information # In[ ]: string = 'This Is a Test' string_information(string) # ### Basic Math # In[13]: print( 3.1 + 3.6 ) print( 3.1/392 ) print( 3.1*3.2 ) print( 4**2 ) # In[14]: import math from math import cos print( cos(2*math.pi) ) print( math.sqrt(4) ) # #### Warning: In Python 2, integer division is "floor division" # * 5/6 = 0 # * 6/5 = 1 # #### NumPy # From http://www.numpy.org/ # > NumPy is the fundamental package for scientific computing with Python. It contains among other things # * a powerful N-dimensional array object # * sophisticated (broadcasting) functions # * tools for integrating C/C++ and Fortran code # * useful linear algebra, Fourier transform, and random number capabilities # " # In[15]: import numpy as np # In[16]: a = np.array([[1,2,3,4],[5,6,7,8]]) print(a) # In[17]: #Subtract 3 from each array element print( a-3 ) # Get the cosine of each array element print( np.cos(a) ) # Calculate e^x for each array element x print (np.exp(a)) # Transpose a print( a.T ) # NumPy is a very important part of scientific Python, and forms an integral part of nearly all other scientific packages. You should look through https://docs.scipy.org/doc/numpy-dev/user/quickstart.html to get some background in how it works. # ### Plotting with matplotlib # The most common plotting library with Python is currently [matplotlib](https://matplotlib.org/), which provides a MATLAB-style interface # # * There are many ways to create figures and subplots with matplotlib, but here we'll just go over a basic example using the pyplot interface. # * Pyplot provides MATLAB-like functionality # In[18]: import matplotlib.pyplot as plt # In[19]: ''' This allows for an interactive figure interface within the jupyter notebook. If you just want to show the figure without interactivity, use %matplotlib inline ''' get_ipython().run_line_magic('matplotlib', 'notebook') # In[20]: x = np.arange(0,100,0.01) y1 = np.cos(x) y2 = np.sin(x) # In[21]: plt.plot(x,y1, label='cos(x)') plt.plot(x,y2, label='sin(x)') plt.axhline(y=0, color='k') # In[22]: #Zoom in on the plot plt.xlim([0,10]) # In[ ]: #Add labels to the axes plt.xlabel("x") plt.ylabel("y") #Add a title plt.title("$sin(x)$ and $cos(x)$") #Add a grid plt.grid() # In[23]: #Plot a basic legend plt.legend() # In[24]: plt.close() # ### Exercise: Working with real data (CSV format) in Pandas # * There are many ways to read CSV data. Python's standard library includes the "csv" package # * "Pandas" (https://pandas.pydata.org/) is one of the key packages in scientific Python and data science # * Pandas makes reading CSVs easy, handles missing data well, and allows for quick calculations and plotting # # In[25]: import pandas as pd # We'll read in a CSV file that contains daily weather data for each day in 2017 from Environment and Climate Change Canada for CYUL (Montreal-Trudeau Airport) # In[26]: cyul_2017 = pd.read_csv('http://www.cdmccray.com/python_tutorial/eng-daily-01012017-12312017.csv') # In[27]: cyul_2017 # In[28]: #Set the index of the Pandas DataFrame to Date cyul_2017.set_index('Date', inplace=True) # In[29]: cyul_2017 # In[30]: cyul_2017.loc['2017-05-29'] # Let's see what the warmest and coldest temperatures of 2017 were: # In[31]: cyul_2017['Tmax'].nlargest(5) # In[32]: cyul_2017['Tmin'].nsmallest(5) # #### Now, let's try plotting some of this data # First, we'll grab the individual columns we want for our x and y data # In[33]: max_temps = cyul_2017['Tmax'] # In[34]: max_temps # In[35]: plt.figure(figsize=[7,4]) plt.plot(max_temps) # Pandas has many convenience functions that allow us to quickly plot our data in a much prettier way! # In[36]: plt.close() plt.figure() cyul_2017['Tmax'].plot(color='red', figsize=[8,5]) cyul_2017['Tmin'].plot(color='blue') # #### Let's add a 30-day rolling average to the plot so that we smooth out the variations # # In[38]: cyul_2017['Tmean'].rolling(30,min_periods=2,center=True).mean().plot(c='k', label='30-day avg. Tmean') plt.grid() # #### Now, add a title, axis labels, and legend # In[39]: plt.title('Montreal Daily Temperatures - 2017') plt.ylabel('Temperature (deg C)') plt.legend() # #### With pandas, you can do quick statistics/calculations as well. This is what makes it really powerful for data analysis # In[40]: cyul_2017.describe() # In[41]: cyul_2017[cyul_2017.Month==12].describe() # ### Try making a plot of one of the other fields in cyul_2017 # In[ ]: plt.close() ####### # ## Other useful AOS-related Python packages # * Xarray - reading, analyzing, editing, creating NetCDF files # * MetPy - https://unidata.github.io/MetPy/latest/ # * Meteorological functions and calculations ($\theta_e$, RH, dew point, etc.) # * Plotting Skew-T diagrams, station plots, radar data, etc. # * Cartopy - http://scitools.org.uk/cartopy/ # * Plotting data on maps # * Note that you'll come across matplotlib basemap - this is no longer developed and Cartopy is currently the successor to basemap