#!/usr/bin/env python # coding: utf-8 # # COMP 364: Advanced Python Structures # # Today we briefly will covere some intermediate-to-advanced Python concepts. # # Topics: # # * generators # * decorators # # # # Knowledge of these concepts will take your Python to a more advanced level. # # # ## Lazy lists: generators # # Remember: Any data Python computes during runtime is stored as an object in **memory** (RAM). # # ** Not to be confused with long term storage.** # # Objects take up **space**. Just like books on a bookshelf. # # In general, we want to keep the memory footprint of our programs to a minimum. # # Misuse of memory can cause: # # 1) Program and system slowdowns # 2) Full program crashes (`MemoryError`) # # In today's world of "Big Data", memory usage is a growing concern. # # ### Problem # # Typical computer memory size is a couple of GB ~4-16 GB of memory. # # What happens when you need to process a data file that is 50GB large? # # Generators to the rescue! # # ### Pre-computing vs lazy computing # # Under the hood we've been dealing with generators all along but thinking of them as lists. # # A **list** is a pre-computed container object because all its values exist in memory all at once. # # In[84]: # this is a list of the first 10 numbers nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # We can access each element directly as they are all stored in memory. # In[85]: nums[6] # I can also create this list with a loop. # In[86]: def firstn(n): nums = [] count = 0 while count < n: nums.append(count) count += 1 return nums # In[87]: nums = firstn(10) print(nums) # But what if we don't need all the numbers stored all at once? # # If we know the rule for computing the next number in the sequence we can just not do anything and spit out the next number when asked for it. # # This is what **generator** functions do. # # **generator** funcions return **generator** objects and can **yield** values during their execution. # # In this case, the rule is "the next number is the previous number + 1" # In[88]: def firstn_gen(n): count = 0 while count < n: yield count count += 1 # In[89]: nums = firstn_gen(10) # In[90]: print(nums) # Ok so we got a `generator` object instead of a list. # # `generator` objects have an attribute called `__next__()` which is a function that returns the next item in the sequence. # In[91]: nums.__next__() # In[94]: i = nums.__next__() print(i) # If we put a geneator in a for loop, python repeatedly calls `__next__()` for you until it runs out of items. # In[95]: nums = firstn_gen(10) for i in nums: print(i) # Note: once we pulled out all the items in a geneator, we can't re-use it. # In[98]: for i in nums: print(i) g = firstn_gen(100) # Okay back to the funny `yield` statement. # In[99]: def firstn_gen(n): count = 0 print("before loop") while count < n: print("yielding number") yield count print("i'm back. incrementing count") count += 1 # In[100]: nums = firstn_gen(10) # `nums` is a generator and we can call its `__next__` method. # Generators behave like functions except their execution can be interrupted and re-started. # # When we reach a `yield` statement, the function exits, yields a number, and all varible assignments are maintained. # # With a normal function, once a function is exited, all local memory is lost. # `__next__` executes the function code. # # If it's the first time `__next__` is called, execution begins at the top of the definition and stops when `yield` is reached. # # Subsequent calls resume after the `yield`. # In[101]: #first call nums.__next__() # In[106]: #second call, resumes after 'yield' nums.__next__() x = 5 d = {} # Because the generator remembers its current state, we can continue with the rest of the numbers with a for loop. # In[103]: for n in nums: print(n) # ### Handling large data files with generators # # Often we want to read from a very large file and do something with each line but we don't need the whole file loaded all at once. # # I [downloaded](https://www.kaggle.com/stackoverflow/pythonquestions/data) all the questions about Python on Stack Overflow, a website I'm sure you're familiar with by now. # In[107]: filepath = "Questions.csv" # Here is the wrong way. # In[108]: def load_data_list(path, encoding="utf-8"): file_handle = open(path, "r", encoding=encoding) file_lines = file_handle.readlines() file_handle.close() return file_lines # In[109]: #this file has a non-standard encoding "latin-1" so I have to specify that. don't worry about file encodings. questions = load_data_list(filepath, encoding="latin-1") # We can use `memory_profiler` (doesn't work in Notebooks) to see how much memory this uses. # In[110]: print(questions[:10]) # In[111]: type(questions) # Now let's lazily read the lines from the file. # In[112]: def lazy_read(filepath, encoding="utf-8"): file_handle = open(filepath, "r", encoding=encoding) while True: yield file_handle.readline() file_handle.close() # In[113]: g = lazy_read(filepath, encoding="latin-1") # In[114]: g.__next__() # In[115]: g.__next__() # In[116]: g.__next__() # In[118]: count = 12 c = 0 for line in g: if c < count: print(line) c += 1 # This is part of what Python does for you when you open a file using `with open()`. # ### Generator comprehensions # # Just like list comprehensions, we can create generator comprehensions. # # Generator comprehensions look exactly like list comprehensions except they use round brackets (). # In[121]: evens = (x for x in range(100) if x % 2 == 0) # In[122]: evens.__next__() # In[123]: next(evens) # In[124]: next(evens) # Is equivalent to: # In[125]: def evens(): x = 0 while x < 100: if x % 2 == 0: yield x x += 1 e = evens() # Generators can create an infinite amount of information while requiring almost zero memory! # # Example: [fibonacci](https://en.wikipedia.org/wiki/Fibonacci_number) numbers # # $F_n = F_{n-1} + F_{n-2}$ where $F_0 = 0$ and $F_1 = 1$ # # e.g. $0, 1, 1, 2, 3, 5, 8, 13, 21, ...$ # # ![](http://seyferseed.ru/wp-content/uploads/2017/03/Fibonacci-Spiral.png) # # ![](https://qph.ec.quoracdn.net/main-qimg-0281d782e4ec471ce2d5091d2c40f1b5-c) # In[126]: def fib(): a = 0 b = 1 while True: yield a a, b = b, a + b # In[127]: f = fib() for i in range(10): print(next(f)) # So now whenever we want to pull out a fibonacci number, we can just call our generator again. # ## Iterables as function arguments: the * operator # # Another cool thing we can do with iterables (such as generators and lists) is pass them as arguments to functions. # # When we pass an iterable to a function with a * before its name, Python unpacks the items as positional arguments. # In[128]: def polynomial(a, b, c): return ((a*x**2) + (b * x) + c for x in range(50)) # In[129]: coefficients = [2, 2, 3] poly_gen = polynomial(*coefficients) # This is the same as # In[132]: poly_gen = polynomial(1, 2, 3) arg_gen = (x for x in range(3)) polynomial(*arg_gen) # In[131]: for p in poly_gen: print(p) # ## Mapping types as keyword arguments, the ** operator # # If a function takes keyword arguments, we can also unpack them using the ** operator. # In[ ]: def student(grade=4.0, name='bob'): print(f"{name}'s gpa is {grade}") # In[ ]: s = {'name': 'carlos', 'grade':3.5} # In[133]: student(s['name'], s['grade']) # In[134]: student(**s) # # ## Wrapping functions: Decorators # # # In the spirit of Christmas, let's talk about **wrapping** functions and **decorators**. # # ![](http://mybbaddict.com/wp-content/uploads/2017/10/delightful-decoration-snoopy-christmas-tree-274-best-peanuts-images-on-pinterest-charlie.jpg) # # First let's do a little recap on functions. # # **Functions are objects** # # Just like objects functions can be: # # * passed as arguments # * bound to names # * returned # # In[136]: def foo(x): return x f = foo b = foo f(5) b(10) # In[137]: def caller(func, arg): return f"calling {func.__name__} with input {arg} result is: {func(arg)}" def is_even(num): return not num % 2 def is_odd(num): return bool(num % 2) print(caller(is_even, 5)) print(caller(is_odd, 5)) # We can also define functions inside other functions and return them. # In[138]: def foo(): x = 5 def boo(): return 5 return boo five = foo() print(type(five)) five() # What if I want to time how long a function takes to run? # In[139]: import time import random #this function runs for 0 to 5 seconds def foo(): print("doing some stuff..") time.sleep(random.randrange(5)) # In[140]: start = time.time() #get current time foo() print(f"time elapsed: {time.time() - start} seconds") # That's nice but what happens next time I want to time a different function? # # I have to write the same code again... repetitive code is no good. # In[141]: def boo(): print("doing some other stuff") time.sleep(random.randrange(3)) start = time.time() boo() print(f"time elapsed: {time.time() - start} seconds") # What if I want to make it so that I can automatically modify any function with timing functionality? # # This is where decorators come in. # # You can think of decorators as functions that create functions but with some useful "decorations". # Since we know that we can return functions, why don't we write a function that takes a function as input and returns a decorated version of it? # In[142]: def timer(func): def wrapped(): start = time.time() func() print(f"{func.__name__} took: {time.time() - start} seconds") #return the decorated function return wrapped # In[143]: def stuff(): print("doing some stuff..") time.sleep(random.randrange(3)) # Let's transform our boring `stuff` function into a decorated version of itself. # In[144]: timed_stuff = timer(stuff) # In[145]: type(timed_stuff) # Now we can use the improved version of our function. # In[146]: timed_stuff() # And we can use the decorator to transform any function (**that takes no arguments**) without having to re-write any code. # In[147]: def other_stuff(): print("doing some other stuff..") time.sleep(random.randrange(3)) # In[148]: timed_other_stuff = timer(other_stuff) timed_other_stuff() # ### The @ operator # # Python makes using decorators a little easier with the `@` operator. # # Instead of creating a new function explicitly, we can just put `@decoratorname` before any function we want to decorate. # In[149]: @timer def fibonaccis(): f = fib() max_fib = 10 for i, num in enumerate(f): print(num) if i > max_fib: break i += 1 # In[150]: fibonaccis() # Another good use of decorators is argument type checking. (source: https://www.python-course.eu/python3_decorators.php) # # Here is one for debugging. # In[151]: def debug(func): def helper(*args): print(f"calling {func.__name__} on arguments {args}") func(*args) return helper @debug def multiply(a, b): return a * b @debug def add(*args): return sum(args) multiply(4, 5) add(1, 2, 3, 4)