#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # # Overview of Python # # (c) 2019 [Steve Phelps](mailto:sphelps@sphelps.net) # ## Python is interpreted # # - Python is an _interpreted_ language, in contrast to Java and C which are compiled languages. # # - This means we can type statements into the interpreter and they are executed immediately. # # In[2]: 5 + 5 # - Groups of statements are all executed one after the other: # In[3]: x = 5 y = 'Hello There' z = 10.5 # - We can visualize the above code using [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%205%0Ay%20%3D%20'Hello%20There'%0Az%20%3D%2010.5&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # In[4]: x + 5 # ## Assignments versus equations # # - In Python when we write `x = 5` this means something different from an equation $x=5$. # # - Unlike variables in mathematical models, variables in Python can refer to different things as more statements are interpreted. # # In[5]: x = 1 print('The value of x is', x) x = 2.5 print('Now the value of x is', x) x = 'hello there' print('Now it is ', x) # ## Calling Functions # # We can call functions in a conventional way using round brackets # In[6]: round(3.14) # ## Types # # - Values in Python have an associated _type_. # # - If we combine types incorrectly we get an error. # In[7]: print(y) # In[8]: y + 5 # ## The type function # # - We can query the type of a value using the `type` function. # In[9]: type(1) # In[10]: type('hello') # In[11]: type(2.5) # In[12]: type(True) # ## Null values # # - Sometimes we represent "no data" or "not applicable". # # - In Python we use the special value `None`. # # - This corresponds to `Null` in Java or SQL. # # In[13]: result = None # - When we fetch the value `None` in the interactive interpreter, no result is printed out. # # In[14]: result # ## Testing for Null values # # - We can check whether there is a result or not using the `is` operator: # In[15]: result is None # In[16]: x = 5 x is None # ## Converting values between types # # - We can convert values between different types. # # ### Converting to floating-point # # - To convert an integer to a floating-point number use the `float()` function. # # In[17]: x = 1 x # In[18]: type(x) # In[19]: y = float(x) y # ### Converting to integers # # - To convert a floating-point to an integer use the `int()` function. # In[20]: type(y) # In[21]: int(y) # ## Variables are not typed # # - _Variables_ themselves, on the other hand, do not have a fixed type. # - It is only the values that they refer to that have a type. # - This means that the type referred to by a variable can change as more statements are interpreted. # # In[22]: y = 'hello' print('The type of the value referred to by y is ', type(y)) y = 5.0 print('And now the type of the value is ', type(y)) # ## Polymorphism # # - The meaning of an operator depends on the types we are applying it to. # # # In[23]: 1 + 1 # In[24]: 'a' + 'b' # In[25]: '1' + '1' # ## Conditional Statements and Indentation # # - The syntax for control structures in Python uses _colons_ and _indentation_. # # - Beware that white-space affects the semantics of Python code. # # - Statements that are indented using the Tab key are grouped together. # ### `if` statements # In[26]: x = 5 if x > 0: print('x is strictly positive.') print(x) print('finished.') # - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%205%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0A%20%20%20%20print%28x%29%0A%20%20%20%20%0Aprint%28'finished.'%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ### Changing indentation # In[27]: x = 0 if x > 0: print('x is strictly positive.') print(x) print('finished.') # - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%200%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0Aprint%28x%29%0A%20%20%20%20%0Aprint%28'finished.'%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ### `if` and `else` # In[28]: x = 0 print('Starting.') if x > 0: print('x is strictly positive.') else: if x < 0: print('x is strictly negative.') else: print('x is zero.') print('finished.') # - Visualize the above on [PythonTutor](http://pythontutor.com/live.html#code=x%20%3D%200%0Aprint%28'Starting.'%29%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0Aelse%3A%0A%20%20%20%20if%20x%20%3C%200%3A%0A%20%20%20%20%20%20%20%20print%28'x%20is%20strictly%20negative.'%29%0A%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20print%28'x%20is%20zero.'%29%0Aprint%28'finished.'%29&cumulative=false&curInstr=6&heapPrimitives=nevernest&mode=display&origin=opt-live.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ### `elif` # In[29]: print('Starting.') if x > 0: print('x is strictly positive') elif x < 0: print('x is strictly negative') else: print('x is zero') print('finished.') # ## Lists # # # We can use _lists_ to hold an ordered sequence of values. # In[30]: l = ['first', 'second', 'third'] l # Lists can contain different types of variable, even in the same list. # In[31]: another_list = ['first', 'second', 'third', 1, 2, 3] another_list # ## Mutable Datastructures # Lists are _mutable_; their contents can change as more statements are interpreted. # In[32]: l.append('fourth') l # ## References # # - Whenever we bind a variable to a value in Python we create a *reference*. # # - A reference is distinct from the value that it refers to. # # - Variables are names for references. # # In[33]: X = [1, 2, 3] Y = X # ### Side effects # - The above code creates two different references (named `X` and `Y`) to the *same* value `[1, 2, 3]` # # - Because lists are mutable, changing them can have side-effects on other variables. # # - If we append something to `X` what will happen to `Y`? # In[34]: X.append(4) X # In[35]: Y # - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=X%20%3D%20%5B1,%202,%203%5D%0AY%20%3D%20X%0AX.append%284%29%0Aprint%28Y%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ## State and identity # # - The state referred to by a variable is *different* from its identity. # # - To compare *state* use the `==` operator. # # - To compare *identity* use the `is` operator. # # - When we compare identity we check equality of references. # # - When we compare state we check equality of values. # # ### Example # # - We will create two *different* lists, with two associated variables. # In[36]: X = [1, 2] Y = [1] Y.append(2) # - Visualize the above code on [PythonTutor](http://pythontutor.com/visualize.html#code=X%20%3D%20%5B1,%202%5D%0AY%20%3D%20%5B1%5D%0AY.append%282%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ### Comparing state # In[37]: X # In[38]: Y # In[39]: X == Y # ### Comparing identity # In[40]: X is Y # ### Copying data prevents side effects # # - In this example, because we have two different lists we avoid side effects # In[41]: Y.append(3) X # In[42]: X == Y # In[43]: X is Y # ## Iteration # # - We can iterate over each element of a list in turn using a `for` loop: # # In[44]: my_list = ['first', 'second', 'third', 'fourth'] for i in my_list: print(i) # - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=my_list%20%3D%20%5B'first',%20'second',%20'third',%20'fourth'%5D%0Afor%20i%20in%20my_list%3A%0A%20%20%20%20print%28i%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ### Including more than one statement inside the loop # In[45]: my_list = ['first', 'second', 'third', 'fourth'] for i in my_list: print("The next item is:") print(i) print() # - Visualize the above code on [PythonTutor](http://pythontutor.com/visualize.html#code=my_list%20%3D%20%5B'first',%20'second',%20'third',%20'fourth'%5D%0Afor%20i%20in%20my_list%3A%0A%20%20%20%20print%28%22The%20next%20item%20is%3A%22%29%0A%20%20%20%20print%28i%29%0A%20%20%20%20print%28%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). # ### Looping a specified number of times # - To perform a statement a certain number of times, we can iterate over a list of the required size. # In[46]: for i in [0, 1, 2, 3]: print("Hello!") # ### The `range` function # # - To save from having to manually write the numbers out, we can use the function `range()` to count for us. # # - We count starting at 0 (as in Java and C++). # In[47]: list(range(4)) # ### `for` loops with the `range` function # In[48]: for i in range(4): print("Hello!") # ## List Indexing # # - Lists can be indexed using square brackets to retrieve the element stored in a particular position. # # # # # In[49]: my_list # In[50]: my_list[0] # In[51]: my_list[1] # ## List Slicing # # - We can also a specify a _range_ of positions. # # - This is called _slicing_. # # - The example below indexes from position 0 (inclusive) to 2 (exclusive). # # # In[52]: my_list[0:2] # ### Indexing from the start or end # - If we leave out the starting index it implies the beginning of the list: # # # In[53]: my_list[:2] # - If we leave out the final index it implies the end of the list: # In[54]: my_list[2:] # #### Copying a list # # - We can conveniently copy a list by indexing from start to end: # # In[55]: new_list = my_list[:] # In[56]: new_list # In[57]: new_list is my_list # In[58]: new_list == my_list # ## Negative Indexing # # - Negative indices count from the end of the list: # # # In[59]: my_list[-1] # In[60]: my_list[:-1] # ## Collections # # - Lists are an example of a *collection*. # # - A collection is a type of value that can contain other values. # # - There are other collection types in Python: # # - `tuple` # - `set` # - `dict` # ### Tuples # # - Tuples are another way to combine different values. # # - The combined values can be of different types. # # - Like lists, they have a well-defined ordering and can be indexed. # # - To create a tuple in Python, use round brackets instead of square brackets # In[61]: tuple1 = (50, 'hello') tuple1 # In[62]: tuple1[0] # In[63]: type(tuple1) # #### Tuples are immutable # # - Unlike lists, tuples are *immutable*. Once we have created a tuple we cannot add values to it. # # # In[64]: tuple1.append(2) # ### Sets # # - Lists can contain duplicate values. # # - A set, in contrast, contains no duplicates. # # - Sets can be created from lists using the `set()` function. # # # # In[65]: X = set([1, 2, 3, 3, 4]) X # In[66]: type(X) # - Alternatively we can write a set literal using the `{` and `}` brackets. # In[67]: X = {1, 2, 3, 4} type(X) # #### Sets are mutable # # - Sets are mutable like lists: # In[68]: X.add(5) X # - Duplicates are automatically removed # In[69]: X.add(5) X # #### Sets are unordered # # - Sets do not have an ordering. # # - Therefore we cannot index or slice them: # # # In[70]: X[0] # #### Operations on sets # # - Union: $X \cup Y$ # # In[71]: X = {1, 2, 3} Y = {4, 5, 6} X | Y # - Intersection: $X \cap Y$: # In[72]: X = {1, 2, 3, 4} Y = {3, 4, 5} X & Y # - Difference $X - Y$: # # In[73]: X - Y # ### Dictionaries # # - A dictionary contains a mapping between *keys*, and corresponding *values*. # # - Mathematically it is a one-to-one function with a finite domain and range. # # - Given a key, we can very quickly look up the corresponding value. # # - The values can be any type (and need not all be of the same type). # # - Keys can be any immutable (hashable) type. # # - They are abbreviated by the keyword `dict`. # # - In other programming languages they are sometimes called *associative arrays*. # #### Creating a dictionary # # - A dictionary contains a set of key-value pairs. # # - To create a dictionary: # # In[74]: students = { 107564: 'Xu', 108745: 'Ian', 102567: 'Steve' } # - The above initialises the dictionary students so that it contains three key-value pairs. # # - The keys are the student id numbers (integers). # # - The values are the names of the students (strings). # # - Although we use the same brackets as for sets, this is a different type of collection: # In[75]: type(students) # #### Accessing the values in a dictionary # # - We can access the value corresponding to a given key using the same syntax to access particular elements of a list: # In[76]: students[108745] # - Accessing a non-existent key will generate a `KeyError`: # In[77]: students[123] # #### Updating dictionary entries # # - Dictionaries are mutable, so we can update the mapping: # In[78]: students[108745] = 'Fred' print(students[108745]) # - We can also grow the dictionary by adding new keys: # In[79]: students[104587] = 'John' print(students[104587]) # #### Dictionary keys can be any immutable type # # - We can use any immutable type for the keys of a dictionary # # - For example, we can map names onto integers: # In[80]: age = { 'John':21, 'Steve':47, 'Xu': 22 } # In[81]: age['Steve'] # #### Creating an empty dictionary # # - We often want to initialise a dictionary with no keys or values. # # - To do this call the function `dict()`: # In[82]: result = dict() # - We can then progressively add entries to the dictionary, e.g. using iteration: # In[83]: for i in range(5): result[i] = i**2 print(result) # #### Iterating over a dictionary # # - We can use a for loop with dictionaries, just as we can with other collections such as sets. # - When we iterate over a dictionary, we iterate over the *keys*. # - We can then perform some computation on each key inside the loop. # - Typically we will also access the corresponding value. # In[84]: for id in students: print(students[id]) # ### The size of a collection # # - We can count the number of values in a collection using the `len` (length) function. # # - This can be used with any type of collection (list, set, tuple etc.). # # In[85]: len(students) # In[86]: len(['one', 'two']) # In[87]: len({'one', 'two', 'three'}) # #### Empty collections # - Empty collections have a size of zero: # In[88]: empty_list = [] len(empty_list) == 0 # ### Arrays # # - Python also has arrays which contain a *single* type of value. # # - i.e. we *cannot* have different types of value within the same array. # # - Arrays are mutable like lists; we can modify the existing elements of an array. # # - However, we typically do not change the size of the array; i.e. it has a fixed length. # ## The `numpy` module # # - Arrays are provided by a separate _module_ called numpy. Modules correspond to packages in e.g. Java. # # - We can import the module and then give it a shorter _alias_. # In[89]: import numpy as np # - We can now use the functions defined in this package by prefixing them with `np`. # # - The function `array()` creates an array given a list. # ### Creating an array # # - We can create an array from a list by using the `array()` function defined in the `numpy` module: # In[90]: x = np.array([0, 1, 2, 3, 4]) x # In[91]: type(x) # ### Functions over arrays # # - When we use arithmetic operators on arrays, we create a new array with the result of applying the operator to each element. # In[92]: y = x * 2 y # - The same goes for functions: # In[93]: x = np.array([-1, 2, 3, -4]) y = abs(x) y # ### Populating Arrays # # - To populate an array with a range of values we use the `np.arange()` function: # # In[94]: x = np.arange(0, 10) x # - We can also use floating point increments. # # In[95]: x = np.arange(0, 1, 0.1) x # ### Basic Plotting # # - We will use a module called `matplotlib` to plot some simple graphs. # # - This module provides functions which are very similar to MATLAB plotting commands. # # In[96]: import matplotlib.pyplot as plt y = x*2 + 5 plt.plot(x, y) plt.show() # ### Plotting a sine curve # In[97]: from numpy import pi, sin x = np.arange(0, 2*pi, 0.01) y = sin(x) plt.plot(x, y) plt.show() # ### Plotting a histogram # # - We can use the `hist()` function in `matplotlib` to plot a histogram # In[98]: # Generate some random data data = np.random.randn(1000) ax = plt.hist(data) plt.show() # ### Computing histograms as matrices # # - The function `histogram()` in the `numpy` module will count frequencies into bins and return the result as a 2-dimensional array. # In[99]: np.histogram(data) # ## Defining new functions # # # In[100]: def squared(x): return x ** 2 squared(5) # ## Local Variables # # - Variables created inside functions are _local_ to that function. # # - They are not accessable to code outside of that function. # In[101]: def squared(x): temp = x ** 2 return temp squared(5) # In[102]: temp # ## Functional Programming # # - Functions are first-class citizens in Python. # # - They can be passed around just like any other value. # In[103]: squared # In[104]: y = squared y # In[105]: y(5) # ## Mapping the elements of a collection # # - We can apply a function to each element of a collection using the built-in function `map()`. # # - This will work with any collection: list, set, tuple or string. # # - This will take as an argument _another function_, and the list we want to apply it to. # # - It will return the results of applying the function, as a list. # In[106]: list(map(squared, [1, 2, 3, 4])) # ## List Comprehensions # - Because this is such a common operation, Python has a special syntax to do the same thing, called a _list comprehension_. # # In[107]: [squared(i) for i in [1, 2, 3, 4]] # - If we want a set instead of a list we can use a set comprehension # In[108]: {squared(i) for i in [1, 2, 3, 4]} # ## Cartesian product using list comprehensions # # # image courtesy of [Quartl](https://commons.wikimedia.org/wiki/User:Quartl) # # The [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) of two collections $X = A \times B$ can be expressed by using multiple `for` statements in a comprehension. # # ### example # In[109]: A = {'x', 'y', 'z'} B = {1, 2, 3} {(a,b) for a in A for b in B} # ## Cartesian products with other collections # # - The syntax for Cartesian products can be used with any collection type. # # In[110]: first_names = ('Steve', 'John', 'Peter') surnames = ('Smith', 'Doe', 'Rabbit') [(first_name, surname) for first_name in first_names for surname in surnames] # ## Joining collections using a zip # # - The Cartesian product pairs every combination of elements. # # - If we want a 1-1 pairing we use an operation called a zip. # # - A zip pairs values at the same position in each sequence. # # - Therefore: # - it can only be used with sequences (not sets); and # - both collections must be of the same length. # In[111]: list(zip(first_names, surnames)) # ## Anonymous Function Literals # # - We can also write _anonymous_ functions. # - These are function literals, and do not necessarily have a name. # - They are called _lambda expressions_ (after the $\lambda-$calculus). # In[112]: list(map(lambda x: x ** 2, [1, 2, 3, 4])) # ## Filtering data # # - We can filter a list by applying a _predicate_ to each element of the list. # # - A predicate is a function which takes a single argument, and returns a boolean value. # # - `filter(p, X)` is equivalent to $\{ x : p(x) \; \forall x \in X \}$ in set-builder notation. # # In[113]: list(filter(lambda x: x > 0, [-5, 2, 3, -10, 0, 1])) # We can use both `filter()` and `map()` on other collections such as strings or sets. # In[114]: list(filter(lambda x: x > 0, {-5, 2, 3, -10, 0, 1})) # ## Filtering using a list comprehension # # - Again, because this is such a common operation, we can use simpler syntax to say the same thing. # # - We can express a filter using a list-comprehension by using the keyword `if`: # In[115]: data = [-5, 2, 3, -10, 0, 1] [x for x in data if x > 0] # - We can also filter and then map in the same expression: # In[116]: from numpy import sqrt [sqrt(x) for x in data if x > 0] # ## The reduce function # # - The `reduce()` function recursively applies another function to pairs of values over the entire list, resulting in a _single_ return value. # In[117]: from functools import reduce reduce(lambda x, y: x + y, [0, 1, 2, 3, 4, 5]) # ## Big Data # # - The `map()` and `reduce()` functions form the basis of the map-reduce programming model. # # - [Map-reduce](https://en.wikipedia.org/wiki/MapReduce) is the basis of modern highly-distributed large-scale computing frameworks. # # - It is used in BigTable, Hadoop and Apache Spark. # # - See [these examples in Python](https://spark.apache.org/examples.html) for Apache Spark.