from collections import Counter def is_anagram(word1, word2): """Checks whether the words are anagrams. word1: string word2: string returns: boolean """ return Counter(word1) == Counter(word2) is_anagram('tachymetric', 'mccarthyite') is_anagram('banana', 'peach') class Multiset(Counter): """A multiset is a set where elements can appear more than once.""" def is_subset(self, other): """Checks whether self is a subset of other. other: Multiset returns: boolean """ for char, count in self.items(): if other[char] < count: return False return True # map the <= operator to is_subset __le__ = is_subset def can_spell(word, tiles): """Checks whether a set of tiles can spell a word. word: string tiles: string returns: boolean """ return Multiset(word) <= Multiset(tiles) can_spell('SYZYGY', 'AGSYYYZ') class Pmf(Counter): """A Counter with probabilities.""" def normalize(self): """Normalizes the PMF so the probabilities add to 1.""" total = float(sum(self.values())) for key in self: self[key] /= total def __add__(self, other): """Adds two distributions. The result is the distribution of sums of values from the two distributions. other: Pmf returns: new Pmf """ pmf = Pmf() for key1, prob1 in self.items(): for key2, prob2 in other.items(): pmf[key1 + key2] += prob1 * prob2 return pmf def __hash__(self): """Returns an integer hash value.""" return id(self) def __eq__(self, other): return self is other def render(self): """Returns values and their probabilities, suitable for plotting.""" return zip(*sorted(self.items())) d6 = Pmf([1,2,3,4,5,6]) d6.normalize() d6.name = 'one die' print(d6) d6_twice = d6 + d6 d6_twice.name = 'two dice' for key, prob in d6_twice.items(): print(key, prob) # if we use the built-in sum we have to provide a Pmf additive identity value # pmf_ident = Pmf([0]) # d6_thrice = sum([d6]*3, pmf_ident) # with --numpy inline we get numpy.sum, which does not require an identity d6_thrice = sum([d6]*3) d6_thrice.name = 'three dice' import matplotlib.pyplot as pyplot for die in [d6, d6_twice, d6_thrice]: xs, ys = die.render() pyplot.plot(xs, ys, label=die.name, linewidth=3, alpha=0.5) pyplot.xlabel('Total') pyplot.ylabel('Probability') pyplot.legend() pyplot.show() class Suite(Pmf): """Map from hypothesis to probability.""" def bayesian_update(self, data): """Performs a Bayesian update. Note: called bayesian_update to avoid overriding dict.update data: result of a die roll """ for hypo in self: like = self.likelihood(data, hypo) self[hypo] *= like self.normalize() def make_die(num_sides): die = Pmf(range(1, num_sides+1)) die.name = 'd%d' % num_sides die.normalize() return die dice = [make_die(x) for x in [4, 6, 8, 12, 20]] print(dice) class DiceSuite(Suite): def likelihood(self, data, hypo): """Computes the likelihood of the data under the hypothesis. data: result of a die roll hypo: Die object """ return hypo[data] dice_suite = DiceSuite(dice) dice_suite.bayesian_update(6) for die, prob in sorted(dice_suite.items()): print die.name, prob dice_suite.bayesian_update(8) for die, prob in sorted(dice_suite.items()): print die.name, prob