#!/usr/bin/env python # coding: utf-8 # # ![](http://www.quickmeme.com/img/dd/dd6c809714a2f1300a5257ac081d6ab0119da35d5e195acba939472a0b94fa7d.jpg) # # # # TL;DR: An iterable is something you can iterate over any number of times, whereas an iterator is something you can iterate over once. Iterables aren't iterators, but iterators are iterables! # # The two are closely related: each time you iterate over an iterable, the interpreter actually creates a new iterator from the iterable, and loops over that. The mechanism is quite simple, but until you understand the details it can seem a little confusing. # # It is often the case that, authors tend to keep these in the **Advanced Section** of their books or articles on Python. # # # # # Iteration in Python # *Do this on every one of these*
# This is applicable generally to anything that uses the **for keyword** specifically # 1. for loops # 1. list,dict and set comprehensions. # # *P.S This is not relevant for while loops* # Let's now make a list of programming languages for obvious reasons 😅 # # In[1]: def do_this_with(o): print("!", o, "!") lang_list = ["Python", "Go", "Java"] # Originally (well, certainly in Python 1.5), for loop iterations over an object x were quite simplistic. The interpeter would internally create a hidden integer variable, then repeatedly index x using the hidden variable as an index (by calling x's getitem method with the hidden variable as an argument), incrementing it to produce successive values until the call produced an IndexError exception, thereby causing the loop to terminate. # In[2]: private_var = 0 while True: try: i = lang_list.__getitem__(private_var) except IndexError: break do_this_with(i) private_var += 1 # The mechanism, which we can think of as the old iteration protocol, was easy to understand but only worked for objects that could be numerically indexed (tuples, lists, and other sequence types). Indices must run from 0 to N-1 and therefore could not be used to iterate over unordered containers(sets,dicts). # # # # Enter the Iterable # # To overcome the limitations of this old protocol, and specifically to allow iteration over objects that can't be numerically indexed, a newer protocol was introduced, which works with any iterable. # # The protocol is quite simple, but not well understood. When you write code like the following to iterate over an iterable such as a list. # # for i in test_list: # or some other iterable # do_something_with(i) # # the interpreter begins by calling the iterable's iter method to create an iterator. If the object has no iter method, the interpreter simply falls back to the old protocol(as Python is backward compatible). If there's no getitem method either, the interpreter just raises a TypeError exception, on the not unreasonable grounds that there's no way to iterate over the given value. # # Let me show you, what I meant. # In[3]: for i in None: do_something_with(i) # Why couldn't Python iterate over NoneType object? Let's see # In[4]: oi = dir(None) print("__iter__" in oi, "__getitem__" in oi) # NoneType has neither iter or getitem methods. # In[5]: _ = lang_list.__iter__() # creates an iterator while True: try: i = _.__next__() except StopIteration: # iterator is exhausted break do_this_with(i) # Each time through the loop, the interpreter extracts the next value from the iterator by calling its next method (Python 2 contained a design flaw and the method is called next, failing to denote it as a special method. It was renamed in Python 3).n the case above, the results of the next call are successively bound to i, until next raises a StopIteration exception, which is used to terminate the loop normally. # # # This is how you can check if the new iteration protocol will work on an object, see whether it has an iter method. # If it does, then it's an iterable. Lists are iterables, for example: # # In[6]: hasattr(lang_list, "__iter__") # So what kind of an object does a call to that method return? A specific kind of iterator called a *list iterator*. # In[7]: x=[1,2,3] print(next(x)) # In[8]: x=[1,2,3] y=iter(x) print(next(y)) print(next(y)) print(next(y)) # In[9]: type(x) # In[10]: type(y) # # Inferences from the 4 cells above : # # Iterables(lists,tuples,dicts,sets etc) # * Do not have the next method # * Have the iter method , which returns an iterator # # Iterators (built in factory function) # * Have both the iter and next methods # * iter method returns self # # # # # # _A factory function is basically used to return (new) objects._ # # # Evidence for the above inferences # In[11]: print(hasattr(x,'__iter__'),hasattr(x,'__next__')) # In[12]: print(hasattr(y,'__iter__'),hasattr(y,'__next__')) # Calling iter method on a iterable returns a *new* iterator # In[13]: iterator_1 = iter(lang_list) iterator_2 = iter(lang_list) print(id(iterator_1)) print(id(iterator_2)) # Calling iter method on a iterator returns *self* # In[14]: iterator_1 = iter(lang_list) iterator_2 = iter(iterator_1) print(id(iterator_1)) print(id(iterator_2)) # # Don't forget # iter(o) == o.iter # # next(o) == o.next # # Now let me make it clear why **Iterators are Iterables but not the other way around**. # # Iterators are Iterables because they too have an iter method and Iterables are not Iterators as they don't have an next method. # # ![](https://i.imgur.com/YPQ5d4z.gif) # # # # A lazy factory # From the outside, the iterator is like a lazy factory that is idle until you ask it for a value, which is when it starts to buzz and produce a single value, after which it turns idle again. # # # Welcome Generators - the nicer cousin of Iterators # * A generator is a special kind of iterator—the elegant kind. # * Any generator also is an iterator (not vice versa!); # * Any generator, therefore, is a factory that lazily produces values. # # # # ![Credits Vincent Driessen](https://nvie.com/img/relationships.png) # # There are two types of generators in Python: generator functions and generator expressions. A generator function is any function in which the keyword *yield* appears in its body. The other type of generators are the generator equivalent of a list comprehension. # # _P.S Both yield and return will return some value from a function._ # # _The difference is that, while a return statement terminates a function entirely, yield statement pauses the function saving all its states and later continues from there on successive calls._ # # Here is what a _generator function_ looks like. Each yield becomes the return value of the next call. # # In[15]: def rangedown(n): for i in reversed(range(n)): yield i # In[16]: generator = rangedown(5) for x in generator: print(x) # # The generator object can be iterated only once. To restart the process we need to create another generator object using something like # # generator = rangedown(5) # # The cell below does not print anything as the generator has already been iterated over. # In[17]: for x in generator: print(x) # Now this is what a generator expression would look like, more or less like any container comprehension. The syntax for generator expression is similar to that of a list comprehension in Python. But the square brackets are replaced with round parentheses. # ## The major difference between a list comprehension and a generator expression is that while list comprehension produces the entire list, generator expression produces one item at a time. # In[18]: numbers=[1,2,3,4,5] lazy_squares = (x * x for x in numbers) # In[19]: lazy_squares # In[20]: next(lazy_squares) # In[21]: list(lazy_squares) # Note that, because we read the first value from lazy_sqaures with next(), it's state is now at the "second" item, so when we consume it entirely by calling list(), that will only return the partial list of sqaures. (This is just to show the lazy behaviour.) # # Simple generators can be easily created on the fly using generator expressions. It makes building generators easy. Same as lambda function creates an anonymous function, generator expression creates an anonymous generator function. # Why generators are used in Python? # There are several reasons which make generators an attractive implementation to go for. # # * Easy to Implement # # Generators can be implemented in a clear and concise way as compared to their iterator class counterpart. # Following is an example to implement a sequence of power of 2's using iterator class. # # # * Memory Efficient # # A normal function to return a sequence will create the entire sequence in memory before returning the result. This is an overkill if the number of items in the sequence is very large. Generator implementation of such sequence is memory friendly and is preferred since it only produces one item at a time. # # * Represent Infinite Stream # # Generators are excellent medium to represent an infinite stream of data. Infinite streams cannot be stored in memory and since generators produce only one item at a time, it can represent infinite stream of data. # # # * [Pipelining Generators](https://brett.is/writing/about/generator-pipelines-in-python/) # In[22]: class PowTwo: def __init__(self, max = 0): self.max = max def __iter__(self): self.n = 0 return self def __next__(self): if self.n > self.max: raise StopIteration result = 2 ** self.n self.n += 1 return result # This was lengthy. Now lets do the same using a generator function. Since, generators keep track of details automatically, it was concise and much cleaner in implementation. # In[23]: def PowTwoGen(max = 0): n = 0 while n < max: yield 2 ** n n += 1 # # # Generators are an incredible powerful programming construct. They allow you to write streaming code with fewer intermediate variables and data structures. Besides that, they are more memory and CPU efficient. Finally, they tend to require fewer lines of code, too. # # # I should use them more and maybe, you should do. # # And just letting you know, # In[24]: from IPython.display import IFrame IFrame(src='https://pythonclock.org/', width=1000, height=600)