#!/usr/bin/env python
# coding: utf-8

# Solution to a problem posted at
# 
# https://www.reddit.com/r/statistics/comments/4csjee/finding_pab_given_two_sets_of_data/
# 
# 
# Copyright 2016 Allen Downey
# 
# MIT License: http://opensource.org/licenses/MIT

# In[1]:


from __future__ import print_function, division

from numpy.random import choice
from collections import Counter
from collections import defaultdict


# Roll six 6-sided dice:

# In[2]:


def roll(die):
    return choice(die, 6)

die = [1,2,3,4,5,6]
roll(die)


# Count how many times each outcome occurs and score accordingly:

# In[3]:


def compute_score(outcome):
    counts = Counter(outcome)
    dd = defaultdict(list)
    [dd[v].append(k) for k, v in counts.items()]
    return len(dd[max(dd)])

compute_score([1,1,1,1,1,1])


# Run many times and accumulate scores:

# In[4]:


n = 100000
scores = [compute_score(roll(die)) for _ in range(n)]


# Print the percentages of each score:

# In[5]:


for score, freq in sorted(Counter(scores).items()):
    print(score, 100*freq/n)


# Or even better, just enumerate the possibilities.

# In[6]:


from itertools import product
die = [1,2,3,4,5,6]
counts = Counter(compute_score(list(outcome)) for outcome in product(*[die]*6))


# In[7]:


n = sum(counts.values())
for score, freq in sorted(counts.items()):
    print(score, 100*freq/n)


# In[ ]: