Go back to the python_reference
repository.
I just cleaned my hard drive and found a couple of useful Python snippets that I had some use for in the past. I thought it would be worthwhile to collect them in a IPython notebook for personal reference and share it with people who might find them useful too.
Most of those snippets are hopefully self-explanatory, but I am planning to add more comments and descriptions in future.
%load_ext watermark
%watermark -d -a "Sebastian Raschka" -v
Sebastian Raschka 26/09/2014 CPython 3.4.1 IPython 2.0.0
More information about the watermark
magic command extension.
# Generating a bitstring from a Python list or numpy array
# where all postive values -> 1
# all negative values -> 0
import numpy as np
def make_bitstring(ary):
return np.where(ary > 0, 1, 0)
def faster_bitstring(ary):
return np.where(ary > 0).astype('i1')
### Example:
ary1 = np.array([1, 2, 0.3, -1, -2])
print('input values %s' %ary1)
print('bitstring %s' %make_bitstring(ary1))
input values [ 1. 2. 0.3 -1. -2. ] bitstring [1 1 1 0 0]
%%file cmd_line_args_1_sysarg.py
import sys
def error(msg):
"""Prints error message, sends it to stderr, and quites the program."""
sys.exit(msg)
args = sys.argv[1:] # sys.argv[0] is the name of the python script itself
try:
arg1 = int(args[0])
arg2 = args[1]
arg3 = args[2]
print("Everything okay!")
except ValueError:
error("First argument must be integer type!")
except IndexError:
error("Requires 3 arguments!")
Overwriting cmd_line_args_1_sysarg.py
% run cmd_line_args_1_sysarg.py 1 2 3
Everything okay!
% run cmd_line_args_1_sysarg.py a 2 3
An exception has occurred, use %tb to see the full traceback. SystemExit: First argument must be integer type!
import time
# print time HOURS:MINUTES:SECONDS
# e.g., '10:50:58'
print(time.strftime("%H:%M:%S"))
# print current date DAY:MONTH:YEAR
# e.g., '06/03/2014'
print(time.strftime("%d/%m/%Y"))
13:28:05 26/09/2014
%%file id_file1.txt
1234
2342
2341
Writing id_file1.txt
%%file id_file2.txt
5234
3344
2341
Writing id_file2.txt
# Print lines that are different between 2 files. Insensitive
# to the order of the file contents.
id_set1 = set()
id_set2 = set()
with open('id_file1.txt', 'r') as id_file:
for line in id_file:
id_set1.add(line.strip())
with open('id_file2.txt', 'r') as id_file:
for line in id_file:
id_set2.add(line.strip())
diffs = id_set2.difference(id_set1)
for d in diffs:
print(d)
print("Total differences:",len(diffs))
5234 3344 Total differences: 2
from itertools import islice
lst = [1,2,3,5,8]
diff = [j - i for i, j in zip(lst, islice(lst, 1, None))]
print(diff)
[1, 1, 2, 3]
def subtract(a, b):
"""
Subtracts second from first number and returns result.
>>> subtract(10, 5)
5
>>> subtract(11, 0.7)
10.3
"""
return a-b
if __name__ == "__main__": # is 'false' if imported
import doctest
doctest.testmod()
print('ok')
ok
def hello_world():
"""
Returns 'Hello, World'
>>> hello_world()
'Hello, World'
"""
return 'hello world'
if __name__ == "__main__": # is 'false' if imported
import doctest
doctest.testmod()
********************************************************************** File "__main__", line 4, in __main__.hello_world Failed example: hello_world() Expected: 'Hello, World' Got: 'hello world' ********************************************************************** 1 items had failures: 1 of 1 in __main__.hello_world ***Test Failed*** 1 failures.
import nltk
def eng_ratio(text):
''' Returns the ratio of non-English to English words from a text '''
english_vocab = set(w.lower() for w in nltk.corpus.words.words())
text_vocab = set(w.lower() for w in text.split() if w.lower().isalpha())
unusual = text_vocab.difference(english_vocab)
diff = len(unusual)/len(text_vocab)
return diff
text = 'This is a test fahrrad'
print(eng_ratio(text))
0.2
import os
import shutil
import glob
# working directory
c_dir = os.getcwd() # show current working directory
os.listdir(c_dir) # shows all files in the working directory
os.chdir('~/Data') # change working directory
# get all files in a directory
glob.glob('/Users/sebastian/Desktop/*')
# e.g., ['/Users/sebastian/Desktop/untitled folder', '/Users/sebastian/Desktop/Untitled.txt']
# walk
tree = os.walk(c_dir)
# moves through sub directories and creates a 'generator' object of tuples
# ('dir', [file1, file2, ...] [subdirectory1, subdirectory2, ...]),
# (...), ...
#check files: returns either True or False
os.exists('../rel_path')
os.exists('/home/abs_path')
os.isfile('./file.txt')
os.isdir('./subdir')
# file permission (True or False
os.access('./some_file', os.F_OK) # File exists? Python 2.7
os.access('./some_file', os.R_OK) # Ok to read? Python 2.7
os.access('./some_file', os.W_OK) # Ok to write? Python 2.7
os.access('./some_file', os.X_OK) # Ok to execute? Python 2.7
os.access('./some_file', os.X_OK | os.W_OK) # Ok to execute or write? Python 2.7
# join (creates operating system dependent paths)
os.path.join('a', 'b', 'c')
# 'a/b/c' on Unix/Linux
# 'a\\b\\c' on Windows
os.path.normpath('a/b/c') # converts file separators
# os.path: direcory and file names
os.path.samefile('./some_file', '/home/some_file') # True if those are the same
os.path.dirname('./some_file') # returns '.' (everythin but last component)
os.path.basename('./some_file') # returns 'some_file' (only last component
os.path.split('./some_file') # returns (dirname, basename) or ('.', 'some_file)
os.path.splitext('./some_file.txt') # returns ('./some_file', '.txt')
os.path.splitdrive('./some_file.txt') # returns ('', './some_file.txt')
os.path.isabs('./some_file.txt') # returns False (not an absolute path)
os.path.abspath('./some_file.txt')
# create and delete files and directories
os.mkdir('./test') # create a new direcotory
os.rmdir('./test') # removes an empty direcotory
os.removedirs('./test') # removes nested empty directories
os.remove('file.txt') # removes an individual file
shutil.rmtree('./test') # removes directory (empty or not empty)
os.rename('./dir_before', './renamed') # renames directory if destination doesn't exist
shutil.move('./dir_before', './renamed') # renames directory always
shutil.copytree('./orig', './copy') # copies a directory recursively
shutil.copyfile('file', 'copy') # copies a file
# Getting files of particular type from directory
files = [f for f in os.listdir(s_pdb_dir) if f.endswith(".txt")]
# Copy and move
shutil.copyfile("/path/to/file", "/path/to/new/file")
shutil.copy("/path/to/file", "/path/to/directory")
shutil.move("/path/to/file","/path/to/directory")
# Check if file or directory exists
os.path.exists("file or directory")
os.path.isfile("file")
os.path.isdir("directory")
# Working directory and absolute path to files
os.getcwd()
os.path.abspath("file")
# Note: rb opens file in binary mode to avoid issues with Windows systems
# where '\r\n' is used instead of '\n' as newline character(s).
# A) Reading in Byte chunks
reader_a = open("file.txt", "rb")
chunks = []
data = reader_a.read(64) # reads first 64 bytes
while data != "":
chunks.append(data)
data = reader_a.read(64)
if data:
chunks.append(data)
print(len(chunks))
reader_a.close()
# B) Reading whole file at once into a list of lines
with open("file.txt", "rb") as reader_b: # recommended syntax, auto closes
data = reader_b.readlines() # data is assigned a list of lines
print(len(data))
# C) Reading whole file at once into a string
with open("file.txt", "rb") as reader_c:
data = reader_c.read() # data is assigned a list of lines
print(len(data))
# D) Reading line by line into a list
data = []
with open("file.txt", "rb") as reader_d:
for line in reader_d:
data.append(line)
print(len(data))
import operator
values = [1, 2, 3, 4, 5]
min_index, min_value = min(enumerate(values), key=operator.itemgetter(1))
max_index, max_value = max(enumerate(values), key=operator.itemgetter(1))
print('min_index:', min_index, 'min_value:', min_value)
print('max_index:', max_index, 'max_value:', max_value)
min_index: 0 min_value: 1 max_index: 4 max_value: 5
# Lambda functions are just a short-hand way or writing
# short function definitions
def square_root1(x):
return x**0.5
square_root2 = lambda x: x**0.5
assert(square_root1(9) == square_root2(9))
def create_message(msg_txt):
def _priv_msg(message): # private, no access from outside
print("{}: {}".format(msg_txt, message))
return _priv_msg # returns a function
new_msg = create_message("My message")
# note, new_msg is a function
new_msg("Hello, World")
My message: Hello, World
from collections import namedtuple
my_namedtuple = namedtuple('field_name', ['x', 'y', 'z', 'bla', 'blub'])
p = my_namedtuple(1, 2, 3, 4, 5)
print(p.x, p.y, p.z)
1 2 3
def normalize(data, min_val=0, max_val=1):
"""
Normalizes values in a list of data points to a range, e.g.,
between 0.0 and 1.0.
Returns the original object if value is not a integer or float.
"""
norm_data = []
data_min = min(data)
data_max = max(data)
for x in data:
numerator = x - data_min
denominator = data_max - data_min
x_norm = (max_val-min_val) * numerator/denominator + min_val
norm_data.append(x_norm)
return norm_data
normalize([1,2,3,4,5])
[0.0, 0.25, 0.5, 0.75, 1.0]
normalize([1,2,3,4,5], min_val=-10, max_val=10)
[-10.0, -5.0, 0.0, 5.0, 10.0]
import numpy as np
ary1 = np.array([1,2,3,4,5]) # must be same type
ary2 = np.zeros((3,4)) # 3x4 matrix consisiting of 0s
ary3 = np.ones((3,4)) # 3x4 matrix consisiting of 1s
ary4 = np.identity(3) # 3x3 identity matrix
ary5 = ary1.copy() # make a copy of ary1
item1 = ary3[0, 0] # item in row1, column1
ary2.shape # tuple of dimensions. Here: (3,4)
ary2.size # number of elements. Here: 12
ary2_t = ary2.transpose() # transposes matrix
ary2.ravel() # makes an array linear (1-dimensional)
# by concatenating rows
ary2.reshape(2,6) # reshapes array (must have same dimensions)
ary3[0:2, 0:3] # submatrix of first 2 rows and first 3 columns
ary3 = ary3[[2,0,1]] # re-arrange rows
# element-wise operations
ary1 + ary1
ary1 * ary1
numpy.dot(ary1, ary1) # matrix/vector (dot) product
numpy.sum(ary1, axis=1) # sum of a 1D array, column sums of a 2D array
numpy.mean(ary1, axis=1) # mean of a 1D array, column means of a 2D array
import pickle
#### Generate some object
my_dict = dict()
for i in range(1,10):
my_dict[i] = "some text"
#### Save object to file
pickle_out = open('my_file.pkl', 'wb')
pickle.dump(my_dict, pickle_out)
pickle_out.close()
#### Load object from file
my_object_file = open('my_file.pkl', 'rb')
my_dict = pickle.load(my_object_file)
my_object_file.close()
print(my_dict)
{1: 'some text', 2: 'some text', 3: 'some text', 4: 'some text', 5: 'some text', 6: 'some text', 7: 'some text', 8: 'some text', 9: 'some text'}
import sys
def give_letter(word):
for letter in word:
yield letter
if sys.version_info[0] == 3:
print('executed in Python 3.x')
test = give_letter('Hello')
print(next(test))
print('in for-loop:')
for l in test:
print(l)
# if Python 2.x
if sys.version_info[0] == 2:
print('executed in Python 2.x')
test = give_letter('Hello')
print(test.next())
print('in for-loop:')
for l in test:
print(l)
executed in Python 3.x H in for-loop: e l l o
import time
start_time = time.clock()
for i in range(10000000):
pass
elapsed_time = time.clock() - start_time
print("Time elapsed: {} seconds".format(elapsed_time))
Time elapsed: 0.49176900000000057 seconds
import timeit
elapsed_time = timeit.timeit('for i in range(10000000): pass', number=1)
print("Time elapsed: {} seconds".format(elapsed_time))
Time elapsed: 0.3550995970144868 seconds
# Here, we make use of the "key" parameter of the in-built "sorted()" function
# (also available for the ".sort()" method), which let's us define a function
# that is called on every element that is to be sorted. In this case, our
# "key"-function is a simple lambda function that returns the last item
# from every tuple.
a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')]
sorted_list = sorted(a_list, key=lambda e: e[::-1])
print(sorted_list)
[(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')]
# prints [(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')]
# If we are only interesting in sorting the list by the last element
# of the tuple and don't care about a "tie" situation, we can also use
# the index of the tuple item directly instead of reversing the tuple
# for efficiency.
a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')]
sorted_list = sorted(a_list, key=lambda e: e[-1])
print(sorted_list)
[(2, 3, 'a'), (3, 2, 'b'), (2, 2, 'b'), (1, 3, 'c')]
"""
You have 3 lists that you want to sort "relative" to each other,
for example, picturing each list as a row in a 3x3 matrix: sort it by columns
########################
If the input lists are
########################
list1 = ['c','b','a']
list2 = [6,5,4]
list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a']
########################
the desired outcome is:
########################
['a', 'b', 'c']
[4, 5, 6]
['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c']
########################
and NOT:
########################
['a', 'b', 'c']
[4, 5, 6]
['another_val-b', 'some-val-associated-with-c', 'z_another_third_val-a']
"""
list1 = ['c','b','a']
list2 = [6,5,4]
list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a']
print('input values:\n', list1, list2, list3)
list1, list2, list3 = [list(t) for t in zip(*sorted(zip(list1, list2, list3)))]
print('\n\nsorted output:\n', list1, list2, list3 )
input values: ['c', 'b', 'a'] [6, 5, 4] ['some-val-associated-with-c', 'another_val-b', 'z_another_third_val-a'] sorted output: ['a', 'b', 'c'] [4, 5, 6] ['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c']
namedtuples
are high-performance container datatypes in the collection
module (part of Python's stdlib since 2.6).
namedtuple()
is factory function for creating tuple subclasses with named fields.
from collections import namedtuple
Coordinates = namedtuple('Coordinates', ['x', 'y', 'z'])
point1 = Coordinates(1, 2, 3)
print('X-coordinate: %d' % point1.x)
X-coordinate: 1