#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
# # Overview of Python
#
# (c) 2019 [Steve Phelps](mailto:sphelps@sphelps.net)
# ## Python is interpreted
#
# - Python is an _interpreted_ language, in contrast to Java and C which are compiled languages.
#
# - This means we can type statements into the interpreter and they are executed immediately.
#
# In[2]:
5 + 5
# - Groups of statements are all executed one after the other:
# In[3]:
x = 5
y = 'Hello There'
z = 10.5
# - We can visualize the above code using [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%205%0Ay%20%3D%20'Hello%20There'%0Az%20%3D%2010.5&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# In[4]:
x + 5
# ## Assignments versus equations
#
# - In Python when we write `x = 5` this means something different from an equation $x=5$.
#
# - Unlike variables in mathematical models, variables in Python can refer to different things as more statements are interpreted.
#
# In[5]:
x = 1
print('The value of x is', x)
x = 2.5
print('Now the value of x is', x)
x = 'hello there'
print('Now it is ', x)
# ## Calling Functions
#
# We can call functions in a conventional way using round brackets
# In[6]:
round(3.14)
# ## Types
#
# - Values in Python have an associated _type_.
#
# - If we combine types incorrectly we get an error.
# In[7]:
print(y)
# In[8]:
y + 5
# ## The type function
#
# - We can query the type of a value using the `type` function.
# In[9]:
type(1)
# In[10]:
type('hello')
# In[11]:
type(2.5)
# In[12]:
type(True)
# ## Null values
#
# - Sometimes we represent "no data" or "not applicable".
#
# - In Python we use the special value `None`.
#
# - This corresponds to `Null` in Java or SQL.
#
# In[13]:
result = None
# - When we fetch the value `None` in the interactive interpreter, no result is printed out.
#
# In[14]:
result
# ## Testing for Null values
#
# - We can check whether there is a result or not using the `is` operator:
# In[15]:
result is None
# In[16]:
x = 5
x is None
# ## Converting values between types
#
# - We can convert values between different types.
#
# ### Converting to floating-point
#
# - To convert an integer to a floating-point number use the `float()` function.
#
# In[17]:
x = 1
x
# In[18]:
type(x)
# In[19]:
y = float(x)
y
# ### Converting to integers
#
# - To convert a floating-point to an integer use the `int()` function.
# In[20]:
type(y)
# In[21]:
int(y)
# ## Variables are not typed
#
# - _Variables_ themselves, on the other hand, do not have a fixed type.
# - It is only the values that they refer to that have a type.
# - This means that the type referred to by a variable can change as more statements are interpreted.
#
# In[22]:
y = 'hello'
print('The type of the value referred to by y is ', type(y))
y = 5.0
print('And now the type of the value is ', type(y))
# ## Polymorphism
#
# - The meaning of an operator depends on the types we are applying it to.
#
#
# In[23]:
1 + 1
# In[24]:
'a' + 'b'
# In[25]:
'1' + '1'
# ## Conditional Statements and Indentation
#
# - The syntax for control structures in Python uses _colons_ and _indentation_.
#
# - Beware that white-space affects the semantics of Python code.
#
# - Statements that are indented using the Tab key are grouped together.
# ### `if` statements
# In[26]:
x = 5
if x > 0:
print('x is strictly positive.')
print(x)
print('finished.')
# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%205%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0A%20%20%20%20print%28x%29%0A%20%20%20%20%0Aprint%28'finished.'%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ### Changing indentation
# In[27]:
x = 0
if x > 0:
print('x is strictly positive.')
print(x)
print('finished.')
# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%200%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0Aprint%28x%29%0A%20%20%20%20%0Aprint%28'finished.'%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ### `if` and `else`
# In[28]:
x = 0
print('Starting.')
if x > 0:
print('x is strictly positive.')
else:
if x < 0:
print('x is strictly negative.')
else:
print('x is zero.')
print('finished.')
# - Visualize the above on [PythonTutor](http://pythontutor.com/live.html#code=x%20%3D%200%0Aprint%28'Starting.'%29%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0Aelse%3A%0A%20%20%20%20if%20x%20%3C%200%3A%0A%20%20%20%20%20%20%20%20print%28'x%20is%20strictly%20negative.'%29%0A%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20print%28'x%20is%20zero.'%29%0Aprint%28'finished.'%29&cumulative=false&curInstr=6&heapPrimitives=nevernest&mode=display&origin=opt-live.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ### `elif`
# In[29]:
print('Starting.')
if x > 0:
print('x is strictly positive')
elif x < 0:
print('x is strictly negative')
else:
print('x is zero')
print('finished.')
# ## Lists
#
#
# We can use _lists_ to hold an ordered sequence of values.
# In[30]:
l = ['first', 'second', 'third']
l
# Lists can contain different types of variable, even in the same list.
# In[31]:
another_list = ['first', 'second', 'third', 1, 2, 3]
another_list
# ## Mutable Datastructures
# Lists are _mutable_; their contents can change as more statements are interpreted.
# In[32]:
l.append('fourth')
l
# ## References
#
# - Whenever we bind a variable to a value in Python we create a *reference*.
#
# - A reference is distinct from the value that it refers to.
#
# - Variables are names for references.
#
# In[33]:
X = [1, 2, 3]
Y = X
# ### Side effects
# - The above code creates two different references (named `X` and `Y`) to the *same* value `[1, 2, 3]`
#
# - Because lists are mutable, changing them can have side-effects on other variables.
#
# - If we append something to `X` what will happen to `Y`?
# In[34]:
X.append(4)
X
# In[35]:
Y
# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=X%20%3D%20%5B1,%202,%203%5D%0AY%20%3D%20X%0AX.append%284%29%0Aprint%28Y%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ## State and identity
#
# - The state referred to by a variable is *different* from its identity.
#
# - To compare *state* use the `==` operator.
#
# - To compare *identity* use the `is` operator.
#
# - When we compare identity we check equality of references.
#
# - When we compare state we check equality of values.
#
# ### Example
#
# - We will create two *different* lists, with two associated variables.
# In[36]:
X = [1, 2]
Y = [1]
Y.append(2)
# - Visualize the above code on [PythonTutor](http://pythontutor.com/visualize.html#code=X%20%3D%20%5B1,%202%5D%0AY%20%3D%20%5B1%5D%0AY.append%282%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ### Comparing state
# In[37]:
X
# In[38]:
Y
# In[39]:
X == Y
# ### Comparing identity
# In[40]:
X is Y
# ### Copying data prevents side effects
#
# - In this example, because we have two different lists we avoid side effects
# In[41]:
Y.append(3)
X
# In[42]:
X == Y
# In[43]:
X is Y
# ## Iteration
#
# - We can iterate over each element of a list in turn using a `for` loop:
#
# In[44]:
my_list = ['first', 'second', 'third', 'fourth']
for i in my_list:
print(i)
# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=my_list%20%3D%20%5B'first',%20'second',%20'third',%20'fourth'%5D%0Afor%20i%20in%20my_list%3A%0A%20%20%20%20print%28i%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ### Including more than one statement inside the loop
# In[45]:
my_list = ['first', 'second', 'third', 'fourth']
for i in my_list:
print("The next item is:")
print(i)
print()
# - Visualize the above code on [PythonTutor](http://pythontutor.com/visualize.html#code=my_list%20%3D%20%5B'first',%20'second',%20'third',%20'fourth'%5D%0Afor%20i%20in%20my_list%3A%0A%20%20%20%20print%28%22The%20next%20item%20is%3A%22%29%0A%20%20%20%20print%28i%29%0A%20%20%20%20print%28%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).
# ### Looping a specified number of times
# - To perform a statement a certain number of times, we can iterate over a list of the required size.
# In[46]:
for i in [0, 1, 2, 3]:
print("Hello!")
# ### The `range` function
#
# - To save from having to manually write the numbers out, we can use the function `range()` to count for us.
#
# - We count starting at 0 (as in Java and C++).
# In[47]:
list(range(4))
# ### `for` loops with the `range` function
# In[48]:
for i in range(4):
print("Hello!")
# ## List Indexing
#
# - Lists can be indexed using square brackets to retrieve the element stored in a particular position.
#
#
#
#
# In[49]:
my_list
# In[50]:
my_list[0]
# In[51]:
my_list[1]
# ## List Slicing
#
# - We can also a specify a _range_ of positions.
#
# - This is called _slicing_.
#
# - The example below indexes from position 0 (inclusive) to 2 (exclusive).
#
#
# In[52]:
my_list[0:2]
# ### Indexing from the start or end
# - If we leave out the starting index it implies the beginning of the list:
#
#
# In[53]:
my_list[:2]
# - If we leave out the final index it implies the end of the list:
# In[54]:
my_list[2:]
# #### Copying a list
#
# - We can conveniently copy a list by indexing from start to end:
#
# In[55]:
new_list = my_list[:]
# In[56]:
new_list
# In[57]:
new_list is my_list
# In[58]:
new_list == my_list
# ## Negative Indexing
#
# - Negative indices count from the end of the list:
#
#
# In[59]:
my_list[-1]
# In[60]:
my_list[:-1]
# ## Collections
#
# - Lists are an example of a *collection*.
#
# - A collection is a type of value that can contain other values.
#
# - There are other collection types in Python:
#
# - `tuple`
# - `set`
# - `dict`
# ### Tuples
#
# - Tuples are another way to combine different values.
#
# - The combined values can be of different types.
#
# - Like lists, they have a well-defined ordering and can be indexed.
#
# - To create a tuple in Python, use round brackets instead of square brackets
# In[61]:
tuple1 = (50, 'hello')
tuple1
# In[62]:
tuple1[0]
# In[63]:
type(tuple1)
# #### Tuples are immutable
#
# - Unlike lists, tuples are *immutable*. Once we have created a tuple we cannot add values to it.
#
#
# In[64]:
tuple1.append(2)
# ### Sets
#
# - Lists can contain duplicate values.
#
# - A set, in contrast, contains no duplicates.
#
# - Sets can be created from lists using the `set()` function.
#
#
#
# In[65]:
X = set([1, 2, 3, 3, 4])
X
# In[66]:
type(X)
# - Alternatively we can write a set literal using the `{` and `}` brackets.
# In[67]:
X = {1, 2, 3, 4}
type(X)
# #### Sets are mutable
#
# - Sets are mutable like lists:
# In[68]:
X.add(5)
X
# - Duplicates are automatically removed
# In[69]:
X.add(5)
X
# #### Sets are unordered
#
# - Sets do not have an ordering.
#
# - Therefore we cannot index or slice them:
#
#
# In[70]:
X[0]
# #### Operations on sets
#
# - Union: $X \cup Y$
#
# In[71]:
X = {1, 2, 3}
Y = {4, 5, 6}
X | Y
# - Intersection: $X \cap Y$:
# In[72]:
X = {1, 2, 3, 4}
Y = {3, 4, 5}
X & Y
# - Difference $X - Y$:
#
# In[73]:
X - Y
# ### Dictionaries
#
# - A dictionary contains a mapping between *keys*, and corresponding *values*.
#
# - Mathematically it is a one-to-one function with a finite domain and range.
#
# - Given a key, we can very quickly look up the corresponding value.
#
# - The values can be any type (and need not all be of the same type).
#
# - Keys can be any immutable (hashable) type.
#
# - They are abbreviated by the keyword `dict`.
#
# - In other programming languages they are sometimes called *associative arrays*.
# #### Creating a dictionary
#
# - A dictionary contains a set of key-value pairs.
#
# - To create a dictionary:
#
# In[74]:
students = { 107564: 'Xu', 108745: 'Ian', 102567: 'Steve' }
# - The above initialises the dictionary students so that it contains three key-value pairs.
#
# - The keys are the student id numbers (integers).
#
# - The values are the names of the students (strings).
#
# - Although we use the same brackets as for sets, this is a different type of collection:
# In[75]:
type(students)
# #### Accessing the values in a dictionary
#
# - We can access the value corresponding to a given key using the same syntax to access particular elements of a list:
# In[76]:
students[108745]
# - Accessing a non-existent key will generate a `KeyError`:
# In[77]:
students[123]
# #### Updating dictionary entries
#
# - Dictionaries are mutable, so we can update the mapping:
# In[78]:
students[108745] = 'Fred'
print(students[108745])
# - We can also grow the dictionary by adding new keys:
# In[79]:
students[104587] = 'John'
print(students[104587])
# #### Dictionary keys can be any immutable type
#
# - We can use any immutable type for the keys of a dictionary
#
# - For example, we can map names onto integers:
# In[80]:
age = { 'John':21, 'Steve':47, 'Xu': 22 }
# In[81]:
age['Steve']
# #### Creating an empty dictionary
#
# - We often want to initialise a dictionary with no keys or values.
#
# - To do this call the function `dict()`:
# In[82]:
result = dict()
# - We can then progressively add entries to the dictionary, e.g. using iteration:
# In[83]:
for i in range(5):
result[i] = i**2
print(result)
# #### Iterating over a dictionary
#
# - We can use a for loop with dictionaries, just as we can with other collections such as sets.
# - When we iterate over a dictionary, we iterate over the *keys*.
# - We can then perform some computation on each key inside the loop.
# - Typically we will also access the corresponding value.
# In[84]:
for id in students:
print(students[id])
# ### The size of a collection
#
# - We can count the number of values in a collection using the `len` (length) function.
#
# - This can be used with any type of collection (list, set, tuple etc.).
#
# In[85]:
len(students)
# In[86]:
len(['one', 'two'])
# In[87]:
len({'one', 'two', 'three'})
# #### Empty collections
# - Empty collections have a size of zero:
# In[88]:
empty_list = []
len(empty_list) == 0
# ### Arrays
#
# - Python also has arrays which contain a *single* type of value.
#
# - i.e. we *cannot* have different types of value within the same array.
#
# - Arrays are mutable like lists; we can modify the existing elements of an array.
#
# - However, we typically do not change the size of the array; i.e. it has a fixed length.
# ## The `numpy` module
#
# - Arrays are provided by a separate _module_ called numpy. Modules correspond to packages in e.g. Java.
#
# - We can import the module and then give it a shorter _alias_.
# In[89]:
import numpy as np
# - We can now use the functions defined in this package by prefixing them with `np`.
#
# - The function `array()` creates an array given a list.
# ### Creating an array
#
# - We can create an array from a list by using the `array()` function defined in the `numpy` module:
# In[90]:
x = np.array([0, 1, 2, 3, 4])
x
# In[91]:
type(x)
# ### Functions over arrays
#
# - When we use arithmetic operators on arrays, we create a new array with the result of applying the operator to each element.
# In[92]:
y = x * 2
y
# - The same goes for functions:
# In[93]:
x = np.array([-1, 2, 3, -4])
y = abs(x)
y
# ### Populating Arrays
#
# - To populate an array with a range of values we use the `np.arange()` function:
#
# In[94]:
x = np.arange(0, 10)
x
# - We can also use floating point increments.
#
# In[95]:
x = np.arange(0, 1, 0.1)
x
# ### Basic Plotting
#
# - We will use a module called `matplotlib` to plot some simple graphs.
#
# - This module provides functions which are very similar to MATLAB plotting commands.
#
# In[96]:
import matplotlib.pyplot as plt
y = x*2 + 5
plt.plot(x, y)
plt.show()
# ### Plotting a sine curve
# In[97]:
from numpy import pi, sin
x = np.arange(0, 2*pi, 0.01)
y = sin(x)
plt.plot(x, y)
plt.show()
# ### Plotting a histogram
#
# - We can use the `hist()` function in `matplotlib` to plot a histogram
# In[98]:
# Generate some random data
data = np.random.randn(1000)
ax = plt.hist(data)
plt.show()
# ### Computing histograms as matrices
#
# - The function `histogram()` in the `numpy` module will count frequencies into bins and return the result as a 2-dimensional array.
# In[99]:
np.histogram(data)
# ## Defining new functions
#
#
# In[100]:
def squared(x):
return x ** 2
squared(5)
# ## Local Variables
#
# - Variables created inside functions are _local_ to that function.
#
# - They are not accessable to code outside of that function.
# In[101]:
def squared(x):
temp = x ** 2
return temp
squared(5)
# In[102]:
temp
# ## Functional Programming
#
# - Functions are first-class citizens in Python.
#
# - They can be passed around just like any other value.
# In[103]:
squared
# In[104]:
y = squared
y
# In[105]:
y(5)
# ## Mapping the elements of a collection
#
# - We can apply a function to each element of a collection using the built-in function `map()`.
#
# - This will work with any collection: list, set, tuple or string.
#
# - This will take as an argument _another function_, and the list we want to apply it to.
#
# - It will return the results of applying the function, as a list.
# In[106]:
list(map(squared, [1, 2, 3, 4]))
# ## List Comprehensions
# - Because this is such a common operation, Python has a special syntax to do the same thing, called a _list comprehension_.
#
# In[107]:
[squared(i) for i in [1, 2, 3, 4]]
# - If we want a set instead of a list we can use a set comprehension
# In[108]:
{squared(i) for i in [1, 2, 3, 4]}
# ## Cartesian product using list comprehensions
#
#
# image courtesy of [Quartl](https://commons.wikimedia.org/wiki/User:Quartl)
#
# The [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) of two collections $X = A \times B$ can be expressed by using multiple `for` statements in a comprehension.
#
# ### example
# In[109]:
A = {'x', 'y', 'z'}
B = {1, 2, 3}
{(a,b) for a in A for b in B}
# ## Cartesian products with other collections
#
# - The syntax for Cartesian products can be used with any collection type.
#
# In[110]:
first_names = ('Steve', 'John', 'Peter')
surnames = ('Smith', 'Doe', 'Rabbit')
[(first_name, surname) for first_name in first_names for surname in surnames]
# ## Joining collections using a zip
#
# - The Cartesian product pairs every combination of elements.
#
# - If we want a 1-1 pairing we use an operation called a zip.
#
# - A zip pairs values at the same position in each sequence.
#
# - Therefore:
# - it can only be used with sequences (not sets); and
# - both collections must be of the same length.
# In[111]:
list(zip(first_names, surnames))
# ## Anonymous Function Literals
#
# - We can also write _anonymous_ functions.
# - These are function literals, and do not necessarily have a name.
# - They are called _lambda expressions_ (after the $\lambda-$calculus).
# In[112]:
list(map(lambda x: x ** 2, [1, 2, 3, 4]))
# ## Filtering data
#
# - We can filter a list by applying a _predicate_ to each element of the list.
#
# - A predicate is a function which takes a single argument, and returns a boolean value.
#
# - `filter(p, X)` is equivalent to $\{ x : p(x) \; \forall x \in X \}$ in set-builder notation.
#
# In[113]:
list(filter(lambda x: x > 0, [-5, 2, 3, -10, 0, 1]))
# We can use both `filter()` and `map()` on other collections such as strings or sets.
# In[114]:
list(filter(lambda x: x > 0, {-5, 2, 3, -10, 0, 1}))
# ## Filtering using a list comprehension
#
# - Again, because this is such a common operation, we can use simpler syntax to say the same thing.
#
# - We can express a filter using a list-comprehension by using the keyword `if`:
# In[115]:
data = [-5, 2, 3, -10, 0, 1]
[x for x in data if x > 0]
# - We can also filter and then map in the same expression:
# In[116]:
from numpy import sqrt
[sqrt(x) for x in data if x > 0]
# ## The reduce function
#
# - The `reduce()` function recursively applies another function to pairs of values over the entire list, resulting in a _single_ return value.
# In[117]:
from functools import reduce
reduce(lambda x, y: x + y, [0, 1, 2, 3, 4, 5])
# ## Big Data
#
# - The `map()` and `reduce()` functions form the basis of the map-reduce programming model.
#
# - [Map-reduce](https://en.wikipedia.org/wiki/MapReduce) is the basis of modern highly-distributed large-scale computing frameworks.
#
# - It is used in BigTable, Hadoop and Apache Spark.
#
# - See [these examples in Python](https://spark.apache.org/examples.html) for Apache Spark.