#!/usr/bin/env python # coding: utf-8 # [Go back](https://github.com/rasbt/python_reference) to the `python_reference` repository. # # A random collection of useful Python snippets # I just cleaned my hard drive and found a couple of useful Python snippets that I had some use for in the past. I thought it would be worthwhile to collect them in a IPython notebook for personal reference and share it with people who might find them useful too. # Most of those snippets are hopefully self-explanatory, but I am planning to add more comments and descriptions in future. #
#
# ## Table of Contents # - [Bitstrings from positive and negative elements in a list](#Bitstrings-from-positive-and-negative-elements-in-a-list) # - [Command line arguments 1 - sys.argv](#Command-line-arguments-1---sys.argv) # - [Data and time basics](#Data-and-time-basics) # - [Differences between 2 files](#Differences-between-2-files) # - [Differences between successive elements in a list](#Differences-between-successive-elements-in-a-list) # - [Doctest example](#Doctest-example) # - [English language detection](#English-language-detection) # - [File browsing basics](#File-browsing-basics) # - [File reading basics](#File-reading-basics) # - [Indices of min and max elements from a list](#Indices-of-min-and-max-elements-from-a-list) # - [Lambda functions](#Lambda-functions) # - [Private functions](#Private-functions) # - [Namedtuples](#Namedtuples) # - [Normalizing data](#Normalizing-data) # - [NumPy essentials](#NumPy-essentials) # - [Pickling Python objects to bitstreams](#Pickling-Python-objects-to-bitstreams) # - [Python version check](#Python-version-check) # - [Runtime within a script](#Runtime-within-a-script) # - [Sorting lists of tuples by elements](#Sorting-lists-of-tuples-by-elements) # - [Sorting multiple lists relative to each other](#Sorting-multiple-lists-relative-to-each-other) # - [Using namedtuples](#Using-namedtuples) #
#
# In[1]: get_ipython().run_line_magic('load_ext', 'watermark') # In[2]: get_ipython().run_line_magic('watermark', '-d -a "Sebastian Raschka" -v') # [More information](https://github.com/rasbt/watermark) about the `watermark` magic command extension. #
#
#
#
# ## Bitstrings from positive and negative elements in a list # [back to top](#Table-of-Contents) # In[3]: # Generating a bitstring from a Python list or numpy array # where all postive values -> 1 # all negative values -> 0 import numpy as np def make_bitstring(ary): return np.where(ary > 0, 1, 0) def faster_bitstring(ary): return np.where(ary > 0).astype('i1') ### Example: ary1 = np.array([1, 2, 0.3, -1, -2]) print('input values %s' %ary1) print('bitstring %s' %make_bitstring(ary1)) #
#
# ## Command line arguments 1 - sys.argv # [back to top](#Table-of-Contents) # In[5]: get_ipython().run_cell_magic('file', 'cmd_line_args_1_sysarg.py', 'import sys\n\ndef error(msg):\n """Prints error message, sends it to stderr, and quites the program."""\n sys.exit(msg)\n\nargs = sys.argv[1:] # sys.argv[0] is the name of the python script itself\n\ntry:\n arg1 = int(args[0])\n arg2 = args[1]\n arg3 = args[2]\n print("Everything okay!")\n\nexcept ValueError:\n error("First argument must be integer type!")\n\nexcept IndexError:\n error("Requires 3 arguments!")\n') # In[6]: get_ipython().run_line_magic('', 'run cmd_line_args_1_sysarg.py 1 2 3') # In[7]: get_ipython().run_line_magic('', 'run cmd_line_args_1_sysarg.py a 2 3') #
#
# ## Data and time basics # [back to top](#Table-of-Contents) # In[7]: import time # print time HOURS:MINUTES:SECONDS # e.g., '10:50:58' print(time.strftime("%H:%M:%S")) # print current date DAY:MONTH:YEAR # e.g., '06/03/2014' print(time.strftime("%d/%m/%Y")) #
#
# ## Differences between 2 files # [back to top](#Table-of-Contents) # In[9]: get_ipython().run_cell_magic('file', 'id_file1.txt', '1234\n2342\n2341\n') # In[10]: get_ipython().run_cell_magic('file', 'id_file2.txt', '5234\n3344\n2341\n') # In[11]: # Print lines that are different between 2 files. Insensitive # to the order of the file contents. id_set1 = set() id_set2 = set() with open('id_file1.txt', 'r') as id_file: for line in id_file: id_set1.add(line.strip()) with open('id_file2.txt', 'r') as id_file: for line in id_file: id_set2.add(line.strip()) diffs = id_set2.difference(id_set1) for d in diffs: print(d) print("Total differences:",len(diffs)) #
#
# ## Differences between successive elements in a list # [back to top](#Table-of-Contents) # In[12]: from itertools import islice lst = [1,2,3,5,8] diff = [j - i for i, j in zip(lst, islice(lst, 1, None))] print(diff) #
#
# ## Doctest example # [back to top](#Table-of-Contents) # In[17]: def subtract(a, b): """ Subtracts second from first number and returns result. >>> subtract(10, 5) 5 >>> subtract(11, 0.7) 10.3 """ return a-b if __name__ == "__main__": # is 'false' if imported import doctest doctest.testmod() print('ok') # In[18]: def hello_world(): """ Returns 'Hello, World' >>> hello_world() 'Hello, World' """ return 'hello world' if __name__ == "__main__": # is 'false' if imported import doctest doctest.testmod() #
#
# ## English language detection # [back to top](#Table-of-Contents) # In[1]: import nltk def eng_ratio(text): ''' Returns the ratio of non-English to English words from a text ''' english_vocab = set(w.lower() for w in nltk.corpus.words.words()) text_vocab = set(w.lower() for w in text.split() if w.lower().isalpha()) unusual = text_vocab.difference(english_vocab) diff = len(unusual)/len(text_vocab) return diff text = 'This is a test fahrrad' print(eng_ratio(text)) #
#
# ## File browsing basics # [back to top](#Table-of-Contents) # In[ ]: import os import shutil import glob # working directory c_dir = os.getcwd() # show current working directory os.listdir(c_dir) # shows all files in the working directory os.chdir('~/Data') # change working directory # get all files in a directory glob.glob('/Users/sebastian/Desktop/*') # e.g., ['/Users/sebastian/Desktop/untitled folder', '/Users/sebastian/Desktop/Untitled.txt'] # walk tree = os.walk(c_dir) # moves through sub directories and creates a 'generator' object of tuples # ('dir', [file1, file2, ...] [subdirectory1, subdirectory2, ...]), # (...), ... #check files: returns either True or False os.exists('../rel_path') os.exists('/home/abs_path') os.isfile('./file.txt') os.isdir('./subdir') # file permission (True or False os.access('./some_file', os.F_OK) # File exists? Python 2.7 os.access('./some_file', os.R_OK) # Ok to read? Python 2.7 os.access('./some_file', os.W_OK) # Ok to write? Python 2.7 os.access('./some_file', os.X_OK) # Ok to execute? Python 2.7 os.access('./some_file', os.X_OK | os.W_OK) # Ok to execute or write? Python 2.7 # join (creates operating system dependent paths) os.path.join('a', 'b', 'c') # 'a/b/c' on Unix/Linux # 'a\\b\\c' on Windows os.path.normpath('a/b/c') # converts file separators # os.path: direcory and file names os.path.samefile('./some_file', '/home/some_file') # True if those are the same os.path.dirname('./some_file') # returns '.' (everythin but last component) os.path.basename('./some_file') # returns 'some_file' (only last component os.path.split('./some_file') # returns (dirname, basename) or ('.', 'some_file) os.path.splitext('./some_file.txt') # returns ('./some_file', '.txt') os.path.splitdrive('./some_file.txt') # returns ('', './some_file.txt') os.path.isabs('./some_file.txt') # returns False (not an absolute path) os.path.abspath('./some_file.txt') # create and delete files and directories os.mkdir('./test') # create a new direcotory os.rmdir('./test') # removes an empty direcotory os.removedirs('./test') # removes nested empty directories os.remove('file.txt') # removes an individual file shutil.rmtree('./test') # removes directory (empty or not empty) os.rename('./dir_before', './renamed') # renames directory if destination doesn't exist shutil.move('./dir_before', './renamed') # renames directory always shutil.copytree('./orig', './copy') # copies a directory recursively shutil.copyfile('file', 'copy') # copies a file # Getting files of particular type from directory files = [f for f in os.listdir(s_pdb_dir) if f.endswith(".txt")] # Copy and move shutil.copyfile("/path/to/file", "/path/to/new/file") shutil.copy("/path/to/file", "/path/to/directory") shutil.move("/path/to/file","/path/to/directory") # Check if file or directory exists os.path.exists("file or directory") os.path.isfile("file") os.path.isdir("directory") # Working directory and absolute path to files os.getcwd() os.path.abspath("file") #
#
# ## File reading basics # [back to top](#Table-of-Contents) # In[ ]: # Note: rb opens file in binary mode to avoid issues with Windows systems # where '\r\n' is used instead of '\n' as newline character(s). # A) Reading in Byte chunks reader_a = open("file.txt", "rb") chunks = [] data = reader_a.read(64) # reads first 64 bytes while data != "": chunks.append(data) data = reader_a.read(64) if data: chunks.append(data) print(len(chunks)) reader_a.close() # B) Reading whole file at once into a list of lines with open("file.txt", "rb") as reader_b: # recommended syntax, auto closes data = reader_b.readlines() # data is assigned a list of lines print(len(data)) # C) Reading whole file at once into a string with open("file.txt", "rb") as reader_c: data = reader_c.read() # data is assigned a list of lines print(len(data)) # D) Reading line by line into a list data = [] with open("file.txt", "rb") as reader_d: for line in reader_d: data.append(line) print(len(data)) #
#
# ## Indices of min and max elements from a list # [back to top](#Table-of-Contents) # In[19]: import operator values = [1, 2, 3, 4, 5] min_index, min_value = min(enumerate(values), key=operator.itemgetter(1)) max_index, max_value = max(enumerate(values), key=operator.itemgetter(1)) print('min_index:', min_index, 'min_value:', min_value) print('max_index:', max_index, 'max_value:', max_value) #
#
# ## Lambda functions # [back to top](#Table-of-Contents) # In[20]: # Lambda functions are just a short-hand way or writing # short function definitions def square_root1(x): return x**0.5 square_root2 = lambda x: x**0.5 assert(square_root1(9) == square_root2(9)) #
#
# ## Private functions # [back to top](#Table-of-Contents) # In[2]: def create_message(msg_txt): def _priv_msg(message): # private, no access from outside print("{}: {}".format(msg_txt, message)) return _priv_msg # returns a function new_msg = create_message("My message") # note, new_msg is a function new_msg("Hello, World") #
#
# ## Namedtuples # [back to top](#Table-of-Contents) # In[25]: from collections import namedtuple my_namedtuple = namedtuple('field_name', ['x', 'y', 'z', 'bla', 'blub']) p = my_namedtuple(1, 2, 3, 4, 5) print(p.x, p.y, p.z) #
#
# ## Normalizing data # [back to top](#Table-of-Contents) # In[28]: def normalize(data, min_val=0, max_val=1): """ Normalizes values in a list of data points to a range, e.g., between 0.0 and 1.0. Returns the original object if value is not a integer or float. """ norm_data = [] data_min = min(data) data_max = max(data) for x in data: numerator = x - data_min denominator = data_max - data_min x_norm = (max_val-min_val) * numerator/denominator + min_val norm_data.append(x_norm) return norm_data # In[31]: normalize([1,2,3,4,5]) # In[30]: normalize([1,2,3,4,5], min_val=-10, max_val=10) #
#
# ## NumPy essentials # [back to top](#Table-of-Contents) # In[ ]: import numpy as np ary1 = np.array([1,2,3,4,5]) # must be same type ary2 = np.zeros((3,4)) # 3x4 matrix consisiting of 0s ary3 = np.ones((3,4)) # 3x4 matrix consisiting of 1s ary4 = np.identity(3) # 3x3 identity matrix ary5 = ary1.copy() # make a copy of ary1 item1 = ary3[0, 0] # item in row1, column1 ary2.shape # tuple of dimensions. Here: (3,4) ary2.size # number of elements. Here: 12 ary2_t = ary2.transpose() # transposes matrix ary2.ravel() # makes an array linear (1-dimensional) # by concatenating rows ary2.reshape(2,6) # reshapes array (must have same dimensions) ary3[0:2, 0:3] # submatrix of first 2 rows and first 3 columns ary3 = ary3[[2,0,1]] # re-arrange rows # element-wise operations ary1 + ary1 ary1 * ary1 numpy.dot(ary1, ary1) # matrix/vector (dot) product numpy.sum(ary1, axis=1) # sum of a 1D array, column sums of a 2D array numpy.mean(ary1, axis=1) # mean of a 1D array, column means of a 2D array #
#
# ## Pickling Python objects to bitstreams # [back to top](#Table-of-Contents) # In[35]: import pickle #### Generate some object my_dict = dict() for i in range(1,10): my_dict[i] = "some text" #### Save object to file pickle_out = open('my_file.pkl', 'wb') pickle.dump(my_dict, pickle_out) pickle_out.close() #### Load object from file my_object_file = open('my_file.pkl', 'rb') my_dict = pickle.load(my_object_file) my_object_file.close() print(my_dict) #
#
# ## Python version check # [back to top](#Table-of-Contents) # In[36]: import sys def give_letter(word): for letter in word: yield letter if sys.version_info[0] == 3: print('executed in Python 3.x') test = give_letter('Hello') print(next(test)) print('in for-loop:') for l in test: print(l) # if Python 2.x if sys.version_info[0] == 2: print('executed in Python 2.x') test = give_letter('Hello') print(test.next()) print('in for-loop:') for l in test: print(l) #
#
# ## Runtime within a script # [back to top](#Table-of-Contents) # In[4]: import time start_time = time.clock() for i in range(10000000): pass elapsed_time = time.clock() - start_time print("Time elapsed: {} seconds".format(elapsed_time)) # In[6]: import timeit elapsed_time = timeit.timeit('for i in range(10000000): pass', number=1) print("Time elapsed: {} seconds".format(elapsed_time)) #
#
# ## Sorting lists of tuples by elements # [back to top](#Table-of-Contents) # In[37]: # Here, we make use of the "key" parameter of the in-built "sorted()" function # (also available for the ".sort()" method), which let's us define a function # that is called on every element that is to be sorted. In this case, our # "key"-function is a simple lambda function that returns the last item # from every tuple. a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')] sorted_list = sorted(a_list, key=lambda e: e[::-1]) print(sorted_list) # In[38]: # prints [(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')] # If we are only interesting in sorting the list by the last element # of the tuple and don't care about a "tie" situation, we can also use # the index of the tuple item directly instead of reversing the tuple # for efficiency. a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')] sorted_list = sorted(a_list, key=lambda e: e[-1]) print(sorted_list) #
#
# ## Sorting multiple lists relative to each other # [back to top](#Table-of-Contents) # In[49]: """ You have 3 lists that you want to sort "relative" to each other, for example, picturing each list as a row in a 3x3 matrix: sort it by columns ######################## If the input lists are ######################## list1 = ['c','b','a'] list2 = [6,5,4] list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a'] ######################## the desired outcome is: ######################## ['a', 'b', 'c'] [4, 5, 6] ['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c'] ######################## and NOT: ######################## ['a', 'b', 'c'] [4, 5, 6] ['another_val-b', 'some-val-associated-with-c', 'z_another_third_val-a'] """ list1 = ['c','b','a'] list2 = [6,5,4] list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a'] print('input values:\n', list1, list2, list3) list1, list2, list3 = [list(t) for t in zip(*sorted(zip(list1, list2, list3)))] print('\n\nsorted output:\n', list1, list2, list3 ) #
#
# ## Using namedtuples # [back to top](#Table-of-Contents) # `namedtuples` are high-performance container datatypes in the [`collection`](https://docs.python.org/2/library/collections.html) module (part of Python's stdlib since 2.6). # `namedtuple()` is factory function for creating tuple subclasses with named fields. # In[1]: from collections import namedtuple Coordinates = namedtuple('Coordinates', ['x', 'y', 'z']) point1 = Coordinates(1, 2, 3) print('X-coordinate: %d' % point1.x) # In[ ]: