#!/usr/bin/env python # coding: utf-8 # # Effective Python - 59 Specific Ways to Write Better Python. # # *Chapter 1 - Pythonic-Thinking* # Book by Brett Slatkin. # Summary notes by Tyler Banks. # ## Item 1: Know Which Version of Python You’re Using # # In[57]: import sys print(sys.version_info) print(sys.version) # ## Item 2: Follow the PEP 8 Style Guide # http://www.python.org/dev/peps/pep-0008/ # Whitepaces # * Use 4 whitespaces # * Lines should be 79 characters or less # * Continuation of long expressions should be intented by 4 extra spaces # * Functions and classes shoulde be separated by two blank lines # * In a class methods should be spearated by one blank # * Don't put spaces around list indexes, function calls, args or assignments # * Put one space before and after variable assignment # # Naming # * Functions, variables and attributes should be in `lowercase_underscore` format # * Protected instance attributes should be in `_leading_underscore` format. # * Private instance attributes should be in `__double_leading_underscore` format # * Classes and exceptions should be in `CapitalizedWord` format # * Module constants should be in `ALL_CAPS` format # * Instance methods in class should use `self` as the name of the first parameter, refering to the object # * Class methods should use `cls` as the name of the first parameter, refering to the class # # Experessions and Statemens # * Use inline negation (`if a is not b`) instead of negation positive statments (`if not a is b`) # * Don't check for empyt values by checking length (`if len(alist) == 0`). Use `if not alist` # * Avoid single line `if` statements, `for`, and `while` loops, and `except` statements. Spread over a series of lines. # * Always put import statements at the top of a file # * Always put absolute names for modules, not relative paths. (`from bar import foo`) not `import foo` # * Imports should be in sections in the following order: Standard library modules, Third party modules, Your own modules. Subsections should be in alphabetical order # # See Pylint (http://www.pylint.org/) to analyze your source code and automatically fix it up! # ## Item 3: Know the Differences Between `bytes`, `str`, and `unicode` # General Python 3 # * There are two types that represent sequences of characters: `bytes` and `str` # * `bytes` contain raw 8-bit values, `str` contains unicode # In[58]: #Convert between str and bytes using encode and decode string = "this is text" print(string) bytes_ = string.encode('utf-8') print("{}".format(bytes_)) string1 = bytes_.decode('utf-8') print(string1) print(bytes == string) print(string == string1) # * `bytes` and `str` are never equivilent # * Files opened will default to UTF-8 encoding not binary # * Use 'wb' to open binary files # In[59]: #with open('/tmp/random.bin', 'wb') as f: # f.write(os.urandom(10)) # * `bytes` contain sequences of 8 bit values. `str` contains unicode. They can't be used together with operators like `>` or `+` # ## Item 4: Write Helper Functions Instead of Complex Expressions # * Don't overcomplicate one line statements # * Move complex expressions to helper functions, especially for repeated code # * `if`/`else` is more readable than `or`/`and` # In[60]: #Example: my_values = {'red':[9,8,7]} print(my_values.get('red', [''])[0] or 0) print(my_values.get('blue', [''])[0] or 0) # * The preceeding reads: from my_values, if 'red' exists (otherwise return '') get the first value ([0]) if it exists, otherwise return 0 # * Do something like this instead # In[61]: def get_first_int(values, key, default=0): found = values.get(key, ['']) if found[0]: found = int(found[0]) else: found = default return found print(my_values.get('blue', [''])[0] or 0) # ## Item 5: Know How to Slice Sequences # * Slicing is built in to `list`,`str`, and `bytes` # * Slicing can be extended to any class that implements `__getitem__` and `__setitem__` methods. (Inherticance from collections.abc -- Item 28) # * Basic form is `alist[start,end]` and `start` is inclusive and `end` is exclusive. # In[62]: a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] print('First four:', a[:4]) print('Last four: ', a[-4:]) print('Middle two:', a[3:-3]) # * Using `alist[0:len(alist)]` is redundant # * Slicing a list will result in a whole new list and modifying the result won't affect the original list # In[63]: b = a[:] b[0:2] = (1,2) b[2:4] = ['z','y'] print(b) print(a) # In[64]: b = a[:] assert b == a and b is not a # * Slicing is forgiving of start and end indexes that are out of bounds making it easy to express slices in the front or back of the list # * Assigning a list slice will replace the range even if their sizes are different # ## Item 6: Avoid Using `start`,`end`, and `stride` in a single slice # * Using `start`, `end`, and `stride` in a slice can be confusing # * Prefer using positive stride in slices without `start` or `end` indexes and avoid using negative `stride` if possible # * Avoid using `start`,`end`, and `stride` in a single slice # * Consider doing two assignments (one to slice, another to stride) or use `isslice` from `itertools` # In[65]: a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] print(a) #Bad b = a[0:6:2] print(b) #Good c = a[0:6] d = c[::2] print(d) assert b == d # ## Item 7: Use List Comprehensions Instead of `map` and `filter` # * List comprehension -- deriving one list from another # * Lists are easier to use than `map` and `filter` because they don't require `lambda` functions. # * Ex: You want to compute the square of each number in a list # In[66]: a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] squares = [x**2 for x in a] print(squares) # * List comprehension is easier to use and allows for filtering # In[67]: even_squares = [x**2 for x in a if x % 2 == 0] print(even_squares) #Bad, confusing use of map and filter alt = map(lambda x: x**2, filter(lambda x: x % 2 == 0, a)) assert even_squares == list(alt) # * Dictionaries and sets have their own equivilents. # In[68]: chile_ranks = {'ghost': 1, 'habanero': 2, 'cayenne': 3} rank_dict = {rank: name for name, rank in chile_ranks.items()} chile_len_set = {len(name) for name in rank_dict.values()} print(rank_dict) print(chile_len_set) # ## Item 8: Avoid More Than Two Expressions in List Comprehensions # * List comprehension allows for more than one loop level # * Don't use more than two for readability # * Ex: flatten a matrix # In[69]: matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] flat = [x for row in matrix for x in row] print(flat) # * Squaring each # In[70]: squared = [[x**2 for x in row] for row in matrix] print(squared) # In[71]: # Additional Examples a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] b = [x for x in a if x > 4 if x % 2 == 0] c = [x for x in a if x > 4 and x % 2 == 0] print(a) print(b) print(c) # In[72]: # Bad # my_lists = [ # [[1, 2, 3], [4, 5, 6]], # … # ] flat = # [ # x # for sublist1 in my_lists # for sublist2 in sublist1 # for x in sublist2] # ## Item 9: Consider Generator Expressions for Large Comprehensions # * List comprehension works well for small lists but large inputs could crash your program due to memory use # * Example: reading a file and returning the number of characters on each line # In[73]: value = [len(x) for x in open('data/i9_file.txt')] print(value) # * Generator expressions don't materialize the whole input sequence when run, it uses an iterator to `yeild` values as they're called # * Generators are created by puting list-comprehension in between `()` characters # In[74]: it = (len(x) for x in open('data/i9_file.txt')) print(it) # In[75]: print(next(it)) # In[76]: roots = ((x,x**0.5) for x in it) print(next(roots)) # * Chaining generators like this runs quickly in Python. # * Useful for large stream of input generators are the best tool # * Iterators are stateful and you need to be careful to only read once # ## Item 10: Prefer `enumerate` over `range` # * `range` is useful for loops over a set of integers # * Not so much for lists # In[77]: #random_bits = 0 #for i in range(64): # if randint(0, 1): # random_bits |= 1 << i # In[78]: flavor_list = ['vanilla', 'chocolate', 'pecan', 'strawberry'] for flavor in flavor_list: print('%s is delicious' % flavor) # In[79]: #Clumsy for i in range(len(flavor_list)): flavor = flavor_list[i] print('%d: %s' % (i + 1, flavor)) # In[80]: # Much better for i, flavor in enumerate(flavor_list): print('%d: %s' % (i + 1, flavor)) # * You can even specify the number at which enumerate starts! Notice the second `enumerate` argument # In[81]: for i, flavor in enumerate(flavor_list, 1): print('%d: %s' % (i, flavor)) # ## Item 11: Use `zip` to Process Iterators in Parallel # In[82]: names = ['Cecilia', 'Lise', 'Marie'] letters = [len(n) for n in names] # In[83]: # Start code longest_name = None max_letters = 0 for i in range(len(names)): count = letters[i] if count > max_letters: longest_name = names[i] max_letters = count print(longest_name) # In[84]: # Better for i, name in enumerate(names): count = letters[i] if count > max_letters: longest_name = name max_letters = count print(longest_name) # In[85]: # Best for name, count in zip(names, letters): if count > max_letters: longest_name = name max_letters = count print(longest_name) # * Zip stops when the first iterator is exhausted, be careful # * Zip is a lazy generator producing a tupple # * Use `zip_longest` from `itertools` to iterate over multiple iterators regardless of length # ## Item 12: Avoid else Blocks After for and while Loops # * Python loops allow for else blocks after loops (`while` and `for`) # * `else` only runs if the loop body did not encounter a break statement # * Confusing, don't use # In[86]: for x in []: print('Never runs') else: print('For Else block!') # ## Item 13: Take Advantage of Each Block in `try`/`except`/`else`/`finally` # * `try`/`finally` allows for you to run cleanup code regardless of exceptions raised in `try` block # * `else` helps minimize the amout of code in `try` and distinguishes success case from `try`/`except` block # * `else` can be used to perform additional actions after successful `try` block but before cleanup in `finally` # In[87]: UNDEFINED = object() def divide_json(path): handle = open(path, 'r+') # May raise IOError try: data = handle.read() # May raise UnicodeDecodeError op = json.loads(data) # May raise ValueError value = ( op['numerator'] / op['denominator']) # May raise ZeroDivisionError except ZeroDivisionError as e: return UNDEFINED else: op['result'] = value result = json.dumps(op) handle.seek(0) handle.write(result) # May raise IOError return value finally: handle.close() # Always runs # In[ ]: