#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')


# # Overview of Python
# 
# (c) 2019 [Steve Phelps](mailto:sphelps@sphelps.net) 

# ## Python is interpreted
# 
# - Python is an _interpreted_ language, in contrast to Java and C which are compiled languages.
# 
# - This means we can type statements into the interpreter and they are executed immediately.
# 

# In[2]:


5 + 5


# - Groups of statements are all executed one after the other:

# In[3]:


x = 5
y = 'Hello There'
z = 10.5


# - We can visualize the above code using [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%205%0Ay%20%3D%20'Hello%20There'%0Az%20%3D%2010.5&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# In[4]:


x + 5


# ## Assignments versus equations
# 
# - In Python when we write `x = 5` this means something different from an equation $x=5$.
# 
# - Unlike variables in mathematical models, variables in Python can refer to different things as more statements are interpreted.
# 

# In[5]:


x = 1
print('The value of x is', x)

x = 2.5
print('Now the value of x is', x)

x = 'hello there'
print('Now it is ', x)


# ## Calling Functions
# 
# We can call functions in a conventional way using round brackets

# In[6]:


round(3.14)


# ## Types
# 
# - Values in Python have an associated _type_.
# 
# - If we combine types incorrectly we get an error.

# In[7]:


print(y)


# In[8]:


y + 5


# ## The type function
# 
# - We can query the type of a value using the `type` function.

# In[9]:


type(1)


# In[10]:


type('hello')


# In[11]:


type(2.5)


# In[12]:


type(True)


# ## Null values
# 
# - Sometimes we represent "no data" or "not applicable".  
# 
# - In Python we use the special value `None`.
# 
# - This corresponds to `Null` in Java or SQL.
# 

# In[13]:


result = None


# - When we fetch the value `None` in the interactive interpreter, no result is printed out.
# 

# In[14]:


result


# ## Testing for Null values
# 
# - We can check whether there is a result or not using the `is` operator:

# In[15]:


result is None


# In[16]:


x = 5
x is None


# ## Converting values between types
# 
# - We can convert values between different types.
# 
# ### Converting to floating-point
# 
# - To convert an integer to a floating-point number use the `float()` function.
# 

# In[17]:


x = 1
x


# In[18]:


type(x)


# In[19]:


y = float(x)
y


# ### Converting to integers
# 
# - To convert a floating-point to an integer use the `int()` function.

# In[20]:


type(y)


# In[21]:


int(y)


# ## Variables are not typed
# 
# - _Variables_ themselves, on the other hand, do not have a fixed type.
# - It is only the values that they refer to that have a type.
# - This means that the type referred to by a variable can change as more statements are interpreted.
# 

# In[22]:


y = 'hello'
print('The type of the value referred to by y is ', type(y))
y = 5.0
print('And now the type of the value is ', type(y))


# ## Polymorphism
# 
# - The meaning of an operator depends on the types we are applying it to.
# 
# 

# In[23]:


1 + 1


# In[24]:


'a' + 'b'


# In[25]:


'1' + '1'


# ## Conditional Statements and Indentation
# 

# - The syntax for control structures in Python uses _colons_ and _indentation_.
# 
# - Beware that white-space affects the semantics of Python code.
# 
# - Statements that are indented using the Tab key are grouped together.

# ### `if` statements

# In[26]:


x = 5
if x > 0:
    print('x is strictly positive.')
    print(x)
    
print('finished.')


# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%205%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0A%20%20%20%20print%28x%29%0A%20%20%20%20%0Aprint%28'finished.'%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ### Changing indentation 

# In[27]:


x = 0
if x > 0:
    print('x is strictly positive.')
print(x)
    
print('finished.')


# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%200%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0Aprint%28x%29%0A%20%20%20%20%0Aprint%28'finished.'%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ### `if` and `else`

# In[28]:


x = 0
print('Starting.')
if x > 0:
    print('x is strictly positive.')
else:
    if x < 0:
        print('x is strictly negative.')
    else:
        print('x is zero.')
print('finished.')


# - Visualize the above on [PythonTutor](http://pythontutor.com/live.html#code=x%20%3D%200%0Aprint%28'Starting.'%29%0Aif%20x%20%3E%200%3A%0A%20%20%20%20print%28'x%20is%20strictly%20positive.'%29%0Aelse%3A%0A%20%20%20%20if%20x%20%3C%200%3A%0A%20%20%20%20%20%20%20%20print%28'x%20is%20strictly%20negative.'%29%0A%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20print%28'x%20is%20zero.'%29%0Aprint%28'finished.'%29&cumulative=false&curInstr=6&heapPrimitives=nevernest&mode=display&origin=opt-live.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ### `elif`

# In[29]:


print('Starting.')
if x > 0:
    print('x is strictly positive')
elif x < 0:
    print('x is strictly negative')
else:
    print('x is zero')
print('finished.')


# ## Lists
# 
# 

# We can use _lists_ to hold an ordered sequence of values.

# In[30]:


l = ['first', 'second', 'third']
l


# Lists can contain different types of variable, even in the same list.

# In[31]:


another_list = ['first', 'second', 'third', 1, 2, 3]
another_list


# ## Mutable Datastructures

# Lists are _mutable_; their contents can change as more statements are interpreted.

# In[32]:


l.append('fourth')
l


# ## References
# 
# - Whenever we bind a variable to a value in Python we create a *reference*.
# 
# - A reference is distinct from the value that it refers to.
# 
# - Variables are names for references.
# 

# In[33]:


X = [1, 2, 3]
Y = X


# ### Side effects

# - The above code creates two different references (named `X` and `Y`) to the *same* value `[1, 2, 3]`
# 
# - Because lists are mutable, changing them can have side-effects on other variables.
# 
# - If we append something to `X` what will happen to `Y`?

# In[34]:


X.append(4)
X


# In[35]:


Y


# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=X%20%3D%20%5B1,%202,%203%5D%0AY%20%3D%20X%0AX.append%284%29%0Aprint%28Y%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ## State and identity
# 
# - The state referred to by a variable is *different* from its identity.
# 
# - To compare *state* use the `==` operator.
# 
# - To compare *identity* use the `is` operator.
# 
# - When we compare identity we check equality of references.
# 
# - When we compare state we check equality of values.
# 

# ### Example
# 
# - We will create two *different* lists, with two associated variables.

# In[36]:


X = [1, 2]
Y = [1]
Y.append(2)


# - Visualize the above code on [PythonTutor](http://pythontutor.com/visualize.html#code=X%20%3D%20%5B1,%202%5D%0AY%20%3D%20%5B1%5D%0AY.append%282%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ### Comparing state

# In[37]:


X


# In[38]:


Y


# In[39]:


X == Y


# ### Comparing identity

# In[40]:


X is Y


# ### Copying data prevents side effects
# 
# - In this example, because we have two different lists we avoid side effects

# In[41]:


Y.append(3)
X


# In[42]:


X == Y


# In[43]:


X is Y


# ## Iteration
# 
# - We can iterate over each element of a list in turn using a `for` loop:
# 

# In[44]:


my_list = ['first', 'second', 'third', 'fourth']
for i in my_list:
    print(i)


# - Visualize the above on [PythonTutor](http://pythontutor.com/visualize.html#code=my_list%20%3D%20%5B'first',%20'second',%20'third',%20'fourth'%5D%0Afor%20i%20in%20my_list%3A%0A%20%20%20%20print%28i%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ### Including more than one statement inside the loop

# In[45]:


my_list = ['first', 'second', 'third', 'fourth']
for i in my_list:
    print("The next item is:")
    print(i)
    print()


# - Visualize the above code on [PythonTutor](http://pythontutor.com/visualize.html#code=my_list%20%3D%20%5B'first',%20'second',%20'third',%20'fourth'%5D%0Afor%20i%20in%20my_list%3A%0A%20%20%20%20print%28%22The%20next%20item%20is%3A%22%29%0A%20%20%20%20print%28i%29%0A%20%20%20%20print%28%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false).

# ### Looping a specified number of times

# - To perform a statement a certain number of times, we can iterate over a list of the required size.

# In[46]:


for i in [0, 1, 2, 3]:
    print("Hello!")


# ### The `range` function
# 
# - To save from having to manually write the numbers out, we can use the function `range()` to count for us.  
# 
# - We count starting at 0 (as in Java and C++).

# In[47]:


list(range(4))


# ### `for` loops with the `range` function

# In[48]:


for i in range(4):
    print("Hello!")


# ## List Indexing
# 
# - Lists can be indexed using square brackets to retrieve the element stored in a particular position.
# 
# 
# 
# 

# In[49]:


my_list


# In[50]:


my_list[0]


# In[51]:


my_list[1]


# ## List Slicing
# 
# - We can also a specify a _range_ of positions.  
# 
# - This is called _slicing_.
# 
# - The example below indexes from position 0 (inclusive) to 2 (exclusive).
# 
# 

# In[52]:


my_list[0:2]


# ### Indexing from the start or end

# - If we leave out the starting index it implies the beginning of the list:
# 
# 

# In[53]:


my_list[:2]


# - If we leave out the final index it implies the end of the list:

# In[54]:


my_list[2:]


# #### Copying a list
# 
# - We can conveniently copy a list by indexing from start to end:
# 

# In[55]:


new_list = my_list[:]


# In[56]:


new_list


# In[57]:


new_list is my_list


# In[58]:


new_list == my_list


# ## Negative Indexing
# 
# - Negative indices count from the end of the list:
# 
# 

# In[59]:


my_list[-1]


# In[60]:


my_list[:-1]


# ## Collections
# 
# - Lists are an example of a *collection*.
# 
# - A collection is a type of value that can contain other values.
# 
# - There are other collection types in Python:
# 
#     - `tuple`
#     - `set`
#     - `dict`

# ### Tuples
# 
# - Tuples are another way to combine different values.
# 
# - The combined values can be of different types.
# 
# - Like lists, they have a well-defined ordering and can be indexed.
# 
# - To create a tuple in Python, use round brackets instead of square brackets

# In[61]:


tuple1 = (50, 'hello')
tuple1


# In[62]:


tuple1[0]


# In[63]:


type(tuple1)


# #### Tuples are immutable
# 
# - Unlike lists, tuples are *immutable*.  Once we have created a tuple we cannot add values to it.
# 
# 

# In[64]:


tuple1.append(2)


# ### Sets
# 
# - Lists can contain duplicate values.
# 
# - A set, in contrast, contains no duplicates.
# 
# - Sets can be created from lists using the `set()` function.
# 
# 
# 

# In[65]:


X = set([1, 2, 3, 3, 4])
X


# In[66]:


type(X)


# - Alternatively we can write a set literal using the `{` and `}` brackets.

# In[67]:


X = {1, 2, 3, 4}
type(X)


# #### Sets are mutable
# 
# - Sets are mutable like lists:

# In[68]:


X.add(5)
X


# - Duplicates are automatically removed

# In[69]:


X.add(5)
X


# #### Sets are unordered
# 
# - Sets do not have an ordering.
# 
# - Therefore we cannot index or slice them:
# 
# 

# In[70]:


X[0]


# #### Operations on sets
# 
# - Union: $X \cup Y$
# 

# In[71]:


X = {1, 2, 3}
Y = {4, 5, 6}
X | Y


# - Intersection: $X \cap Y$:

# In[72]:


X = {1, 2, 3, 4}
Y = {3, 4, 5}
X & Y


# - Difference $X - Y$:
# 

# In[73]:


X - Y


# ### Dictionaries
# 
# - A dictionary contains a mapping between *keys*, and corresponding *values*.
#     
#     - Mathematically it is a one-to-one function with a finite domain and range.
#     
# - Given a key, we can very quickly look up the corresponding value.
# 
# - The values can be any type (and need not all be of the same type).
# 
# - Keys can be any immutable (hashable) type.
# 
# - They are abbreviated by the keyword `dict`.
# 
# - In other programming languages they are sometimes called *associative arrays*.

# #### Creating a dictionary
# 
# - A dictionary contains a set of key-value pairs.
# 
# - To create a dictionary:
# 

# In[74]:


students = { 107564: 'Xu', 108745: 'Ian', 102567: 'Steve' }


# - The above initialises the dictionary students so that it contains three key-value pairs.
# 
# - The keys are the student id numbers (integers).
# 
# - The values are the names of the students (strings).
# 
# - Although we use the same brackets as for sets, this is a different type of collection:

# In[75]:


type(students)


# #### Accessing the values in a dictionary
# 
# - We can access the value corresponding to a given key using the same syntax to access particular elements of a list: 

# In[76]:


students[108745]


# - Accessing a non-existent key will generate a `KeyError`:

# In[77]:


students[123]


# #### Updating dictionary entries
# 
# - Dictionaries are mutable, so we can update the mapping:

# In[78]:


students[108745] = 'Fred'
print(students[108745])


# - We can also grow the dictionary by adding new keys:

# In[79]:


students[104587] = 'John'
print(students[104587])


# #### Dictionary keys can be any immutable type
# 
# - We can use any immutable type for the keys of a dictionary
# 
# - For example, we can map names onto integers:

# In[80]:


age = { 'John':21, 'Steve':47, 'Xu': 22 }


# In[81]:


age['Steve']


# #### Creating an empty dictionary
# 
# - We often want to initialise a dictionary with no keys or values.
# 
# - To do this call the function `dict()`:

# In[82]:


result = dict()


# - We can then progressively add entries to the dictionary, e.g. using iteration:

# In[83]:


for i in range(5):
    result[i] = i**2
print(result)


# #### Iterating over a dictionary
# 
# - We can use a for loop with dictionaries, just as we can with other collections such as sets.
# - When we iterate over a dictionary, we iterate over the *keys*.
# - We can then perform some computation on each key inside the loop.
# - Typically we will also access the corresponding value.

# In[84]:


for id in students:
    print(students[id])


# ### The size of a collection
# 
# - We can count the number of values in a collection using the `len` (length) function.
# 
# - This can be used with any type of collection (list, set, tuple etc.).
# 

# In[85]:


len(students)


# In[86]:


len(['one', 'two'])


# In[87]:


len({'one', 'two', 'three'})


# #### Empty collections

# - Empty collections have a size of zero:

# In[88]:


empty_list = []
len(empty_list) == 0


# ### Arrays
# 
# - Python also has arrays which contain a *single* type of value.
# 
# - i.e. we *cannot* have different types of value within the same array.   
# 
# - Arrays are mutable like lists; we can modify the existing elements of an array.
# 
# - However, we typically do not change the size of the array; i.e. it has a fixed length.

# ## The `numpy` module
# 
# - Arrays are provided by a separate _module_ called numpy.  Modules correspond to packages in e.g. Java.
# 
# - We can import the module and then give it a shorter _alias_.

# In[89]:


import numpy as np


# - We can now use the functions defined in this package by prefixing them with `np`.  
# 
# - The function `array()` creates an array given a list.

# ### Creating an array
# 
# - We can create an array from a list by using the `array()` function defined in the `numpy` module:

# In[90]:


x = np.array([0, 1, 2, 3, 4])
x


# In[91]:


type(x)


# ### Functions over arrays
# 
# - When we use arithmetic operators on arrays, we create a new array with the result of applying the operator to each element.

# In[92]:


y = x * 2
y


# - The same goes for functions:

# In[93]:


x = np.array([-1, 2, 3, -4])
y = abs(x)
y


# ### Populating Arrays
# 
# - To populate an array with a range of values we use the `np.arange()` function:
# 

# In[94]:


x = np.arange(0, 10)
x


# - We can also use floating point increments.
# 

# In[95]:


x = np.arange(0, 1, 0.1)
x


# ### Basic Plotting
# 
# - We will use a module called `matplotlib` to plot some simple graphs.
# 
# - This module provides functions which are very similar to MATLAB plotting commands.
# 

# In[96]:


import matplotlib.pyplot as plt

y = x*2 + 5
plt.plot(x, y)
plt.show()


# ### Plotting a sine curve

# In[97]:


from numpy import pi, sin

x = np.arange(0, 2*pi, 0.01)
y = sin(x)
plt.plot(x, y)
plt.show()


# ### Plotting a histogram
# 
# - We can use the `hist()` function in `matplotlib` to plot a histogram

# In[98]:


# Generate some random data
data = np.random.randn(1000)

ax = plt.hist(data)
plt.show()


# ### Computing histograms as matrices
# 
# - The function `histogram()` in the `numpy` module will count frequencies into bins and return the result as a 2-dimensional array.

# In[99]:


np.histogram(data)


# ## Defining new functions
# 
# 

# In[100]:


def squared(x):
    return x ** 2

squared(5)


# ## Local Variables
# 
# - Variables created inside functions are _local_ to that function.
# 
# - They are not accessable to code outside of that function.

# In[101]:


def squared(x):
    temp = x ** 2
    return temp

squared(5)


# In[102]:


temp


# ## Functional Programming
# 
# - Functions are first-class citizens in Python.
# 
# - They can be passed around just like any other value.

# In[103]:


squared


# In[104]:


y = squared
y


# In[105]:


y(5)


# ## Mapping the elements of a collection
# 
# - We can apply a function to each element of a collection using the built-in function `map()`.
# 
# - This will work with any collection: list, set, tuple or string.
# 
# - This will take as an argument _another function_, and the list we want to apply it to.
# 
# - It will return the results of applying the function, as a list.

# In[106]:


list(map(squared, [1, 2, 3, 4]))


# ## List Comprehensions

# - Because this is such a common operation, Python has a special syntax to do the same thing, called a _list comprehension_.
# 

# In[107]:


[squared(i) for i in [1, 2, 3, 4]]


# - If we want a set instead of a list we can use a set comprehension

# In[108]:


{squared(i) for i in [1, 2, 3, 4]}


# ## Cartesian product using list comprehensions
# 
# 

# <img src="figs/220px-Cartesian_Product_qtl1.svg.png"> <sup>image courtesy of [Quartl](https://commons.wikimedia.org/wiki/User:Quartl)</sup>
# 
# The [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) of two collections $X = A \times B$ can be expressed by using multiple `for` statements in a comprehension.
# 

# ### example

# In[109]:


A = {'x', 'y', 'z'}
B = {1, 2, 3}
{(a,b) for a in A for b in B}


# ## Cartesian products with other collections
# 
# - The syntax for Cartesian products can be used with any collection type.
# 

# In[110]:


first_names = ('Steve', 'John', 'Peter')
surnames = ('Smith', 'Doe', 'Rabbit')

[(first_name, surname) for first_name in first_names for surname in surnames]


# ## Joining collections using a zip
# 
# - The Cartesian product pairs every combination of elements.
# 
# - If we want a 1-1 pairing we use an operation called a zip.
# 
# - A zip pairs values at the same position in each sequence.
# 
# - Therefore:
#     - it can only be used with sequences (not sets); and
#     - both collections must be of the same length.

# In[111]:


list(zip(first_names, surnames))


# ## Anonymous Function Literals
# 
# - We can also write _anonymous_ functions.
# - These are function literals, and do not necessarily have a name.
# - They are called _lambda expressions_ (after the $\lambda-$calculus).

# In[112]:


list(map(lambda x: x ** 2, [1, 2, 3, 4]))


# ## Filtering data
# 
# - We can filter a list by applying a _predicate_ to each element of the list.
# 
# - A predicate is a function which takes a single argument, and returns a boolean value.
# 
# - `filter(p, X)` is equivalent to $\{ x : p(x) \; \forall x \in X \}$ in set-builder notation.
# 

# In[113]:


list(filter(lambda x: x > 0, [-5, 2, 3, -10, 0, 1]))


# We can use both `filter()` and `map()` on other collections such as strings or sets.

# In[114]:


list(filter(lambda x: x > 0, {-5, 2, 3, -10, 0, 1}))


# ## Filtering using a list comprehension
# 
# - Again, because this is such a common operation, we can use simpler syntax to say the same thing.
# 
# - We can express a filter using a list-comprehension by using the keyword `if`:

# In[115]:


data = [-5, 2, 3, -10, 0, 1]
[x for x in data if x > 0]


# - We can also filter and then map in the same expression:

# In[116]:


from numpy import sqrt
[sqrt(x) for x in data if x > 0]


# ## The reduce function
# 
# - The `reduce()` function recursively applies another function to pairs of values over the entire list, resulting in a _single_ return value.

# In[117]:


from functools import reduce
reduce(lambda x, y: x + y, [0, 1, 2, 3, 4, 5])


# ## Big Data
# 
# - The `map()` and `reduce()` functions form the basis of the map-reduce programming model.
# 
# - [Map-reduce](https://en.wikipedia.org/wiki/MapReduce) is the basis of modern highly-distributed large-scale computing frameworks.
# 
# - It is used in BigTable, Hadoop and Apache Spark. 
# 
# - See [these examples in Python](https://spark.apache.org/examples.html) for Apache Spark.