In [ ]:
pwd







Advanced Strings & File I/O

UC Berkeley Python Bootcamp. All Rights Reserved.

Strings can do operations on themselves

.lower(), .upper(),.capitalize()

In [ ]:
"funKY tOwn".capitalize()
In [ ]:
"funky tOwn".lower()
In [ ]:
"fUNKY tOWN".swapcase()

How you call this:

.split([sep [,maxsplit]])

In [ ]:
"funKY tOwn".split()
In [ ]:
"funKY tOwn".capitalize().split()
In [ ]:
[x.capitalize() for x in "funKY tOwn".split()]
In [ ]:
"I want to take you to, funKY tOwn".split("u")
In [ ]:
"I want to take you to, funKY tOwn".split("you")

 

.strip(), .join(), .replace()

In [ ]:
csv_string = 'Dog,Cat,Spam,Defenestrate,1, 3.1415   \n\t'
csv_string.strip()
In [ ]:
a = "spam\n "
In [ ]:
a.strip("s")
In [ ]:
clean_list = [x.strip() for x in csv_string.split(",")]
print(clean_list)

.join() allows you to glue a list of strings together with a certain string

In [ ]:
print(",".join(clean_list))
In [ ]:
print("\t".join(clean_list))

.replace() strings in strings

In [ ]:
csv_string = 'Dog,Cat,Spam,Defenestrate,1, 3.1415   \n\t'
alt_csv = csv_string.replace(' ','')
alt_csv
In [ ]:
print(csv_string.strip().replace(' ','').replace(',','\t'))

 

.find()

incredibly useful searching, returning the index of the search

In [ ]:
s = 'My Funny Valentine'
s.find("y")
In [ ]:
s.find?
In [ ]:
s.find("Funny")
In [ ]:
s[s.find("Funny"):]
In [ ]:
s.find("z")
In [ ]:
ss = [s,"Argentine","American","Quarentine","Manafort"]
for thestring in ss:
   if thestring.find("tine") != -1:
      print("'" + str(thestring) + "' contains 'tine'.")

 

string module

exposes useful variables and functions

In [ ]:
import string
In [ ]:
string.ascii_letters
In [ ]:
string.digits
In [ ]:
string.ascii_uppercase

 

String Formatting

casting using str() is very limited Python gives access to C-like string formatting

   usage:  “%(format)” % (variable)
In [ ]:
import math
print("My favorite integer is %i and my favorite float is %f,\n" 
      " which to three decimal places is %.3f and in exponential form is %e" 
     % (3,math.pi,math.pi,math.pi))

common formats:

f (float), i (integer), s (string), g (nicely formatting floats)

http://docs.python.org/release/2.7.2/library/stdtypes.html#string-formatting-operations

 

String Formatting

% escapes “%”

In [ ]:
print("I promise to give 100%% effort whenever asked of")

+ and zero-padding

In [ ]:
print("%f\n%+f\n%f\n%10f\n%10s" % (math.pi,math.pi,-1.0*math.pi,math.pi,"pi"))

 

String Formatting

the (somewhat) preferred way

is string.format(value0,value1,....)

In [ ]:
'on {0}, I feel {0}'.format("saturday","groovy")
In [ ]:
'on {}, I feel {}'.format("saturday","groovy")
In [ ]:
'on {0}, I feel {1}'.format(["saturday","groovy"])
In [ ]:
'on {0}, I feel {0}'.format(["saturday","groovy"])
In [ ]:
'on {1}, I feel {0}'.format("saturday","groovy")

you can assign by argument position or by name

In [ ]:
'{desire} to {place}'.format(desire='Fly me',\
                             place='The Moon')
In [ ]:
'{desire} to {place} or else I wont visit {place}.'.format( \
                 desire='Fly me',place='The Moon')
In [ ]:
f = {"desire": "I want to take you", "place": "funky town"}
In [ ]:
'{desire} to {place}'.format(**f)

 

Formatting comes after a colon (:)

In [ ]:
print("%03.2f" % 3.14159)
In [ ]:
("%03.2f" % 3.14159) ==  "{:03.2f}".format(3.14159)
In [ ]:
"{0:03.2f}".format(3.14159,42)
In [ ]:
x = 10
strformat = "{0:<%i.2f}" % x
In [ ]:
print(strformat)
In [ ]:
strformat.format(3.14159,42)
In [ ]:
# format also supports binary numbers
"int: {0:d};  hex: {0:x};  oct: {0:o};  bin: {0:b}".format(42)

 

File I/O (read/write)

.open() and .close() are builtin functions

In [ ]:
%%file mydata.dat
This is my zeroth file I/O. Zing!
In [ ]:
file_stream = open('mydata.dat','r') ; print(type(file_stream))
file_stream.close()

open modes: r (read), w (write), r+ (read + update), rb (read as a binary stream, ...), rt (read as text file)

Writing data: .write() or .writelines()

In [ ]:
f= open("test.dat","w")
f.write("This is my first file I/O. Zing! Again.")
f.close()
!cat test.dat
In [ ]:
f= open("test.dat","w")
f.writelines(["a=['This is my second file I/O.']","Take that Dr. Zing!"])
f.close()
!cat test.dat

Likewise, there is .readlines() and .read()

In [ ]:
f= open("test.dat","r")
data = f.readlines()
f.close() ; print(data)
In [ ]:
type(data)

 

In [ ]:
%%file tabbify_my_csv.py
"""
small copy program that turns a csv file into a tabbed file

  PYTHON BOOT CAMP EXAMPLE; 
    created by Josh Bloom at UC Berkeley, 2010,2012,2013,2015 ([email protected])

"""
import os

def tabbify(infilename,outfilename,ignore_comments=True,comment_chars="#;/"):
    """
INPUT: infilename
OUTPUT: creates a file called outfilename
    """
    if not os.path.exists(infilename):
        return  # do nothing if the file isn't there
    f = open(infilename,"r")
    o = open(outfilename,"w")
    inlines = f.readlines() ; f.close()
    outlines = []
    for l in inlines:
        if ignore_comments and (l[0] in comment_chars):
            outlines.append(l)
        else:
            outlines.append(l.replace(",","\t"))
    o.writelines(outlines) ; o.close()
In [ ]:
!cat google_share_price.csv |head
In [ ]:
%run tabbify_my_csv.py
tabbify("google_share_price.csv","google_share_price.tsv")
In [ ]:
!cat google_share_price.csv |head
In [ ]:
!cat google_share_price.tsv |head

 

File I/O (read/write)

shutil module is preferred for copying, archiving & removing files/directories

http://docs.python.org/library/shutil.html#module-shutil

tempfile module is used for the creation of temporary directories and files

http://www.doughellmann.com/PyMOTW/tempfile/

In [ ]:
import tempfile
tmp = tempfile.TemporaryFile() ; type(tmp)
In [ ]:
tmp = tempfile.NamedTemporaryFile(suffix=".csv",\
                           prefix="boot",dir="/tmp",delete=False)
print(tmp.name)
In [ ]:
tmp.write(bytes("# stock phrases of today's youth\nWassup?!,OMG,LOL,BRB,Python\n","utf-8"))
tmp.close()
!cat $tmp.name
In [ ]:
tmp = tempfile.NamedTemporaryFile(suffix=".csv",\
                           prefix="boot",dir="/tmp",delete=False)
print(tmp.name)
tmp.write(b"# stock phrases of today's youth\nWassup?!,OMG,LOL,BRB,Python\n")
tmp.close()
!cat $tmp.name

 

io module StringIO/BytesIO

handy for making file-like objects out of strings

In [ ]:
import io
myfile = io.StringIO( \
   "# stock phrases of today's youth\nWassup?!,OMG,LOL,BRB,Python\n")
myfile.getvalue()  ## get what we just wrote
In [ ]:
myfile.seek(0)     ## go back to the beginning
myfile.readlines()
In [ ]:
myfile.seek(1)
myfile.readlines()
In [ ]:
myfile.close()
In [ ]:
myfile.write('not gonna happen')
In [ ]:
myfile = io.BytesIO(b"# stock phrases of today's youth\nWassup?!,OMG,LOL,BRB,Python\n")
In [ ]:
myfile.seek(2)  ; myfile.write(b"silly wah wah") ; myfile.seek(0)
In [ ]:
myfile.readlines()

subprocess module

subprocess is the preferred way to interact with other programs, as you might do on the command line

In [ ]:
from subprocess import *
p = Popen("ls", shell=True, stdout=PIPE)  # list the directory
p.pid  # get the process ID of the new subprocess
In [ ]:
val = !ls
In [ ]:
val
In [ ]:
print(p.stdout.readlines())
In [ ]:
p = Popen("vanRossum-Trump-2016", shell=True, stdout=PIPE,stderr=PIPE)
In [ ]:
print(p.stderr.readlines())
In [ ]:
import os

it's often advisable to wait until the subprocess has finished

In [ ]:
p = Popen("find / -name '*.py'", shell=True, stdout=PIPE,stderr=PIPE)
In [ ]:
os.waitpid(p.pid, 0)  ## this will block until the search is done

(c) J Bloom 2013-2016 All Rights Reserved

In [ ]: