from IPython import display
display.YouTubeVideo('csyL9EC0S0c?t=24m47s')
%load python_tour.md
I think substack's turtle graphics was Javascript.
Mine was Python, something about the elegance appeals to the mathematician inside me. It's much less about engineering
Like Marijuana laws the dutch are leading the way in readable programming.
import this
So you might want to use python as if you're trying to do something that is just about munging text or most things that aren't about making event driven websites
def is_even(a):
if a % 2 == 0:
print('Even!')
return True
print('Odd!')
return False
#multiple assignment
a, b, c = 'spam', 'eggs', 'parrot'
print(a, b, c)
for (i=0; i < mylist_length; i++) {
do_something(mylist[i]);
}
#The direct equivalent in Python would be this:
i = 0
while i < mylist_length:
do_something(mylist[i])
i += 1
#That, however, while it works, is not considered Pythonic. It's not an idiom the Python language encourages. We could improve it. A typical idiom in Python to generate all numbers in a list would be to use something like the built-in range() function:
for i in range(mylist_length):
do_something(mylist[i])
#This is however not Pythonic either. Here is the Pythonic way, encouraged by the language itself:
for element in mylist:
do_something(element)
numbers = [1,2,3,4]
for number in numbers:
print(number)
#Hacker School
rice_crispies.items()
rice_crispies = {3:'Crackle', 5:'Pop'}
for i in range(101):
print(i)
for flake in rice_crispies.keys():
if i % flake == 0:
print(rice_crispies[flake])
ahundred = range(101)
new_list = [i*2 for i in ahundred if i % 2 == 0]
print(new_list)
new_list[0:10]
There's an old programming proverb which goes something like this:
Show me you algorithm,
and I will remain puzzled,
but show me your data structure,
and I will be enlightened.
This is a statement about software and coding, but first and foremost it is about human cognition. The way my brain works is to first visualize the data and then imagine what the algorithm does to it.
tel = {'jack': 4098, 'sape': 4139}
tel['guido']
i think dictionaries are handled particularly nicely in https://docs.python.org/3/tutorial/datastructures.html#dictionaries
words = ['spam', 'spam', 'eggs', 'spam']
from collections import defaultdict
word_counter = defaultdict(int)
words = ['spam', 'spam', 'eggs', 'spam', 'parrot']
for word in words:
word_counter[word] += 1
print(word_counter)
Now I'll show you why to write your alrogithms in python, or at least why Norvig of google does
!wget http://norvig.com/spell-correct.html
#90% of the google spelling corrector in 21 lines of Python
!head -n20 big.txt
import re, collections
def words(text):
return re.findall('[a-z]+', text.lower())
def train(features):
model = collections.defaultdict(int)
for f in features:
model[f] += 1
return model
NWORDS = train(words(file('big.txt').read()))
alphabet = 'abcdefghijklmnopqrstuvwxyz'
def edits(word):
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [a + b[1:] for a, b in splits if b]
transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b]
inserts = [a + c + b for a, b in splits for c in alphabet]
return set(deletes + transposes + replaces + inserts)
def known_edits(word):
edits_of_edits = set()
for e1 in edits(word):
for e2 in edits(e1):
if e2 in NWORDS:
edits_of_edits.add(e2)
return edits_of_edits
#norvigs way
#return set(e2 for e1 in edits(word) for e2 in edits(e1) if e2 in NWORDS)
def known(words):
return set(w for w in words if w in NWORDS)
def correct(word):
candidates = known([word]) or known(edits(word)) or known_edits(word) or [word]
return max(candidates, key=NWORDS.get)
correct('spam')
'spasm'
word = 'spam'
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [a + b[1:] for a, b in splits if b]
transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b]
inserts = [a + c + b for a, b in splits for c in alphabet]
print(splits)
print(deletes)
print(transposes)
print(replaces)
print(inserts)
!wget https://www.gutenberg.org/cache/epub/468/pg468.txt manon.txt
--2015-01-14 16:29:58-- https://www.gutenberg.org/cache/epub/468/pg468.txt Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47 Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 370164 (361K) [text/plain] Saving to: 'pg468.txt’ 100%[======================================>] 370,164 98.6KB/s in 3.7s 2015-01-14 16:30:03 (98.6 KB/s) - 'pg468.txt’ saved [370164/370164] --2015-01-14 16:30:03-- http://manon.txt/ Resolving manon.txt (manon.txt)... failed: Name or service not known. wget: unable to resolve host address 'manon.txt’ FINISHED --2015-01-14 16:30:03-- Total wall clock time: 5.4s Downloaded: 1 files, 361K in 3.7s (98.6 KB/s)
manon_string = open('pg468.txt','r').read()
manon_words = manon_string.split()[1000:2000]
transform_words #do list comprehension here to to put `correct` on all the words in the list manon_words
outfile = open('transformed_manon.txt','w')
transform_string = ' '.join(transform_words)
outfile.write(transform_string)