## simulate 100 trials of flipping a coin 100 times

num_trials = 100
num_flips = 100

head_counts = []

target_number = 42
target_count = 0.

## for each trial
for t in xrange(num_trials):
    
    ## simulate a single trial of 100 flips
    num_heads = 0
    for f in xrange(num_flips):
        r = random.randint(0,1)
        
        ## if the number of 0, record it as a head
        if (r == 0):
            num_heads += 1

    head_counts.append(num_heads)
    
    if (num_heads == target_number):
        target_count += 1

target_prob = target_count / num_trials
print "I saw %d %d of %d times for a probability of %0.5f" % (target_number, target_count, num_trials, target_prob)


## print the number of times we saw heads in the first 50 trials
print head_counts[0:50]


## Plot the histogram
#import matplotlib.pyplot as plt
plt.figure()
plt.hist(head_counts)
plt.show()


## Plot the histogram using integer values by creating more bins
plt.figure()
plt.hist(head_counts, bins=range(num_flips))
plt.title("Using integer values")
plt.show()


## Plot the density function, notice bars sum to exactly 1 
plt.figure()
plt.hist(head_counts, bins=range(num_flips), normed=True)
plt.show()


## Compute mean & stdev of distribution
mean = (sum(head_counts) + 0.) / num_trials
sumdiff = 0.0
for count in head_counts:
    diff = (count - mean) ** 2
    sumdiff += diff

sumdiff /= num_trials
stdev = math.sqrt(sumdiff)

print "The average value was %0.2f +/- %0.2f" % (mean, stdev)


## Compute the probability of seeing 42 heads
prob_heads = .5
prob_42 = math.factorial(num_flips) / (math.factorial(42) * math.factorial(num_flips-42)) * (prob_heads**42) * ((1-prob_heads)**(num_flips-42))
print "The probability of seeing 42 heads in %d trials is %.05f" % (num_trials, prob_42)


## evaluate the probability density function for the range [0, num_flips]
prob_dist = []
for k in xrange(num_flips+1):
    prob_k = math.factorial(num_flips) / (math.factorial(k) * math.factorial(num_flips-k)) * (prob_heads**k) * ((1-prob_heads)**(num_flips-k))
    prob_dist.append(prob_k)

plt.figure()
plt.plot(prob_dist, color='red', linewidth=4)
plt.show()


## overlay the analytic distribution over the data
plt.figure()
plt.hist(head_counts, bins=range(num_flips), normed=True)
plt.plot(prob_dist, color='red', linewidth=4)
plt.show()


expected_mean  = num_flips * prob_heads
expected_stdev = math.sqrt(num_flips * prob_heads * (1 - prob_heads)) 

print "The expected frequency is %.02f +/- %.02f" % (expected_mean, expected_stdev)
print "The observed frequency was %0.2f +/- %0.2f" % (mean, stdev)


gaus_prob_dist = []
pi = 3.1415926

for k in xrange(num_flips+1):
    var = expected_stdev**2
    num = math.exp(-(float(k)-expected_mean)**2/(2*var))
    denom = math.sqrt(2*pi*var)
    gaus_prob = num/denom
    gaus_prob_dist.append(gaus_prob)

plt.figure()
plt.plot(gaus_prob_dist, color='green', linewidth=4, linestyle='dashed')
plt.show()


## overlay the analytic distributions over the data
plt.figure()
plt.hist(head_counts, bins=range(num_flips), normed=True)
plt.plot(prob_dist, color='red', linewidth=4)
plt.plot(gaus_prob_dist, color='green', linewidth=4, linestyle='dashed')
plt.show()