from thinkbayes import Pmf
pmf = Pmf()
for x in range(1, 7):
pmf.Set(x, 1)
pmf.Print()
1 1 2 1 3 1 4 1 5 1 6 1
pmf.Normalize()
6
pmf.Print()
1 0.166666666667 2 0.166666666667 3 0.166666666667 4 0.166666666667 5 0.166666666667 6 0.166666666667
pmf = Pmf()
pmf.Set('Bowl 1', 0.5)
pmf.Set('Bowl 2', 0.5)
P(Vanilla | Bowl 1) = 3/4
P(Vanilla | Bowl 1) = 2/4
pmf.Mult('Bowl 1', 0.75)
pmf.Mult('Bowl 2', 0.5)
pmf.Normalize()
print pmf.Prob('Bowl 1')
0.6
We representa box with dice of [4, 6, 8, 12, 20] sides.
from thinkbayes import Suite
# Start with equal priors
suite = Suite([4, 6, 8, 12, 20])
suite.Print()
4 0.2 6 0.2 8 0.2 12 0.2 20 0.2
We're going to add a Likelihood
method
from thinkbayes import Suite
class Dice(Suite):
def Likelihood(self, hypo, data):
"""Computes the likelihood of the data under the hypothesis.
hypo: integer number of sides on the die
data: integer die roll
"""
num_sides = hypo
outcome = data
if outcome > num_sides:
return 0
else:
return 1.0/num_sides
Let's build a suite that represents our dice:
suite = Dice([4, 6, 8, 12, 20])
And now let's say that we roll once and get a 6. What does this tells us about the probabilities for the various dice?
suite.Update(6)
print 'After one 6'
suite.Print()
After one 6 4 0.0 6 0.392156862745 8 0.294117647059 12 0.196078431373 20 0.117647058824
And now let's roll a few more dice and figure out how that informs us about the probability of the die being of a specific size:
for roll in [6, 4, 8, 7, 7, 2]:
suite.Update(roll)
suite.Print()
4 0.0 6 0.0 8 0.943248453672 12 0.0552061280613 20 0.0015454182665
%pylab inline
Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.kernel.zmq.pylab.backend_inline]. For more information, type 'help(pylab)'.
import thinkbayes
import myplot
class Train(thinkbayes.Suite):
"""Suite of hypotheses about the number of trains."""
def Likelihood(self, hypo, data):
"""Computes the likelihood of the data under the hypothesis.
hypo: integer hypothesis about the number of trains
data: integer serial number of the observed train
"""
num_trains = hypo
train_seen = data
if train_seen > num_trains:
return 0
else:
return 1.0/num_trains
Given the data, our mean estimate is: 597.344008084
hypos = xrange(100, 1001)
suite = Train(hypos)
suite.Update(321)
print 'Given the data, our mean estimate is:',
print suite.Mean()
myplot.Pmf(suite)
Given the data, our mean estimate is: 597.344008084
Let's now assume we're spotting a few more trains, and let's see how our posterior updates as we measure this data:
for train in [782, 423]:
suite.Update(train)
print 'Given the data, our mean estimate is:',
print suite.Mean()
myplot.Pmf(suite)
Given the data, our mean estimate is: 877.54322135
Estimation: based on the data (140h, 110t), what is the probability that the coin lands heads?
Hypothesis testing: what is the probability that the coin is fair?
class Euro(thinkbayes.Suite):
def Likelihood(self, hypo, data):
"""Computes the likelihood of the data under the hypothesis.
hypo: integer value of x, the probability of heads (0-100)
data: string 'H' or 'T'
"""
# Note: while we're writing the likelihood function, we
# *assume* the hypothesis is true. Don't forget this!
# We convert the hypothesis to a probability in the 0-1 range:
x = hypo/100.0
# Now, since the hypothesis is about Heads, if the data is H
# it means the hypothesis is True, so we can just return the hypothesis
if data == 'H':
return x
else:
return 1-x
suite = Euro(range(0, 101))
myplot.Pmf(suite)
suite.Update('H')
myplot.Pmf(suite)
suite.Update('H')
myplot.Pmf(suite)
suite.Update('T')
myplot.Pmf(suite)
for outocme in 'HHHHHHHHTTT':
suite.Update(outocme)
myplot.Pmf(suite)
suite = Euro(range(0, 101))
for outocme in 'H'*140 + 'T'*110:
suite.Update(outocme)
myplot.Pmf(suite)
What is the probability that the coin is fair? I.e., what is the prob. that x is 50%?
suite.Prob(50)
0.02097652612954468
%load euro2.py
class Euro(thinkbayes.Suite):
def Likelihood(self, hypo, data):
"""Computes the likelihood of the data under the hypothesis.
hypo: integer value of x, the probability of heads (0-100)
data: string 'H' or 'T'
"""
x = hypo / 100.0
heads, tails = data
like = x**heads * (1-x)**tails
return like
def AverageLikelihood(suite, data):
"""Computes the average likelihood over all hypothesis in suite.
Args:
suite: Suite of hypotheses
data: some representation of the observed data
Returns:
float
"""
total = 0
for hypo, prob in suite.Items():
like = suite.Likelihood(hypo, data)
total += prob * like
return total
fair = Euro()
fair.Set(50, 1)
bias = Euro()
for x in range(0, 101):
if x != 50:
bias.Set(x, 1)
bias.Normalize()
data = 140, 110
like_bias = AverageLikelihood(bias, data)
print 'like_bias', like_bias
like_fair = AverageLikelihood(fair, data)
print 'like_fair', like_fair
ratio = like_bias / like_fair
print 'Bayes factor', ratio
like_bias 2.57964902292e-76 like_fair 5.52714787526e-76 Bayes factor 0.466723359161
bias = Euro()
for x in range(40, 70):
if x != 50:
bias.Set(x, 1)
bias.Normalize()
like_bias = AverageLikelihood(bias, data)
print 'like_bias', like_bias
ratio = like_bias / like_fair
print 'Bayes factor', ratio
like_bias 8.89531486664e-76 Bayes factor 1.60938608255
import csv
import math
import numpy
import sys
import matplotlib
import myplot
def ReadScale(filename='sat_scale.csv', col=2):
"""Reads a CSV file of SAT scales (maps from raw score to standard score).
Args:
filename: string filename
col: which column to start with (0=Reading, 2=Math, 4=Writing)
Returns: thinkbayes.Interpolator object
"""
def ParseRange(s):
t = [int(x) for x in s.split('-')]
return 1.0 * sum(t) / len(t)
fp = open(filename)
reader = csv.reader(fp)
raws = []
scores = []
for t in reader:
try:
raw = int(t[col])
raws.append(raw)
score = ParseRange(t[col+1])
scores.append(score)
except:
pass
raws.sort()
scores.sort()
return thinkbayes.Interpolator(raws, scores)
def ReadRanks(filename='sat_ranks.csv'):
"""Reads a CSV file of SAT scores.
Args:
filename: string filename
Returns:
list of (score, freq) pairs
"""
fp = open(filename)
reader = csv.reader(fp)
res = []
for t in reader:
try:
score = int(t[0])
freq = int(t[1])
res.append((score, freq))
except ValueError:
pass
return res
def DivideValues(pmf, denom):
"""Divides the values in a Pmf by denom.
Returns a new Pmf.
"""
new = thinkbayes.Pmf()
denom = float(denom)
for val, prob in pmf.Items():
x = val / denom
new.Set(x, prob)
return new
class Exam(object):
"""Encapsulates information about an exam.
Contains the distribution of scaled scores and an
Interpolator that maps between scaled and raw scores.
"""
def __init__(self):
self.scale = ReadScale()
scores = ReadRanks()
score_pmf = thinkbayes.MakePmfFromDict(dict(scores))
self.raw = self.ReverseScale(score_pmf)
self.max_score = max(self.raw.Values())
self.prior = DivideValues(self.raw, denom=self.max_score)
def Lookup(self, raw):
"""Looks up a raw score and returns a scaled score."""
return self.scale.Lookup(raw)
def Reverse(self, score):
"""Looks up a scaled score and returns a raw score.
Since we ignore the penalty, negative scores round up to zero.
"""
raw = self.scale.Reverse(score)
return raw if raw > 0 else 0
def ReverseScale(self, pmf):
"""Applies the reverse scale to the values of a PMF.
Args:
pmf: Pmf object
scale: Interpolator object
Returns:
new Pmf
"""
new = thinkbayes.Pmf()
for val, prob in pmf.Items():
raw = self.Reverse(val)
new.Incr(raw, prob)
return new
class Sat(thinkbayes.Suite):
"""Represents the distribution of efficacy for a test-taker."""
def __init__(self, exam):
thinkbayes.Suite.__init__(self)
self.exam = exam
# start with the prior distribution
for p, prob in exam.prior.Items():
self.Set(p, prob)
def Likelihood(self, hypo, data):
"""Computes the likelihood of a test score, given efficacy."""
p = hypo
score = data
raw = self.exam.Reverse(score)
yes, no = raw, self.exam.max_score - raw
like = p**yes * (1-p)**no
return like
def MakeFigures(exam, alice, bob):
formats=['png']
myplot.Pmf(exam.prior, label='prior')
myplot.Save(root='sat_prior',
formats=formats,
xlabel='p',
ylabel='PMF')
myplot.Clf()
myplot.Pmfs([alice, bob])
myplot.Save(root='sat_posterior',
formats=formats,
xlabel='p',
ylabel='PMF')
def PmfProbGreater(pmf1, pmf2):
"""Probability that a value from pmf1 is more than a value from pmf2.
Args:
pmf1: Pmf object
pmf2: Pmf object
Returns:
float probability
"""
total = 0.0
for v1, p1 in pmf1.Items():
for v2, p2 in pmf2.Items():
if p1 > p2:
total += p1*p2
return total
exam = Exam()
alice = Sat(exam)
alice.name = 'alice'
alice.Update(780)
bob = Sat(exam)
bob.name = 'bob'
bob.Update(760)
print 'Prob Alice is "smarter":', PmfProbGreater(alice, bob)
Prob Alice is "smarter": 0.572030956616