#!/usr/bin/env python # coding: utf-8 # Solution to a problem posted at # # https://www.reddit.com/r/statistics/comments/4csjee/finding_pab_given_two_sets_of_data/ # # # Copyright 2016 Allen Downey # # MIT License: http://opensource.org/licenses/MIT # In[1]: from __future__ import print_function, division from numpy.random import choice from collections import Counter from collections import defaultdict # Roll six 6-sided dice: # In[2]: def roll(die): return choice(die, 6) die = [1,2,3,4,5,6] roll(die) # Count how many times each outcome occurs and score accordingly: # In[3]: def compute_score(outcome): counts = Counter(outcome) dd = defaultdict(list) [dd[v].append(k) for k, v in counts.items()] return len(dd[max(dd)]) compute_score([1,1,1,1,1,1]) # Run many times and accumulate scores: # In[4]: n = 100000 scores = [compute_score(roll(die)) for _ in range(n)] # Print the percentages of each score: # In[5]: for score, freq in sorted(Counter(scores).items()): print(score, 100*freq/n) # Or even better, just enumerate the possibilities. # In[6]: from itertools import product die = [1,2,3,4,5,6] counts = Counter(compute_score(list(outcome)) for outcome in product(*[die]*6)) # In[7]: n = sum(counts.values()) for score, freq in sorted(counts.items()): print(score, 100*freq/n) # In[ ]: