## simulate 100 trials of flipping a coin 100 times num_trials = 100 num_flips = 100 head_counts = [] target_number = 42 target_count = 0. ## for each trial for t in xrange(num_trials): ## simulate a single trial of 100 flips num_heads = 0 for f in xrange(num_flips): r = random.randint(0,1) ## if the number of 0, record it as a head if (r == 0): num_heads += 1 head_counts.append(num_heads) if (num_heads == target_number): target_count += 1 target_prob = target_count / num_trials print "I saw %d %d of %d times for a probability of %0.5f" % (target_number, target_count, num_trials, target_prob) ## print the number of times we saw heads in the first 50 trials print head_counts[0:50] ## Plot the histogram #import matplotlib.pyplot as plt plt.figure() plt.hist(head_counts) plt.show() ## Plot the histogram using integer values by creating more bins plt.figure() plt.hist(head_counts, bins=range(num_flips)) plt.title("Using integer values") plt.show() ## Plot the density function, notice bars sum to exactly 1 plt.figure() plt.hist(head_counts, bins=range(num_flips), normed=True) plt.show() ## Compute mean & stdev of distribution mean = (sum(head_counts) + 0.) / num_trials sumdiff = 0.0 for count in head_counts: diff = (count - mean) ** 2 sumdiff += diff sumdiff /= num_trials stdev = math.sqrt(sumdiff) print "The average value was %0.2f +/- %0.2f" % (mean, stdev) ## Compute the probability of seeing 42 heads prob_heads = .5 prob_42 = math.factorial(num_flips) / (math.factorial(42) * math.factorial(num_flips-42)) * (prob_heads**42) * ((1-prob_heads)**(num_flips-42)) print "The probability of seeing 42 heads in %d trials is %.05f" % (num_trials, prob_42) ## evaluate the probability density function for the range [0, num_flips] prob_dist = [] for k in xrange(num_flips+1): prob_k = math.factorial(num_flips) / (math.factorial(k) * math.factorial(num_flips-k)) * (prob_heads**k) * ((1-prob_heads)**(num_flips-k)) prob_dist.append(prob_k) plt.figure() plt.plot(prob_dist, color='red', linewidth=4) plt.show() ## overlay the analytic distribution over the data plt.figure() plt.hist(head_counts, bins=range(num_flips), normed=True) plt.plot(prob_dist, color='red', linewidth=4) plt.show() expected_mean = num_flips * prob_heads expected_stdev = math.sqrt(num_flips * prob_heads * (1 - prob_heads)) print "The expected frequency is %.02f +/- %.02f" % (expected_mean, expected_stdev) print "The observed frequency was %0.2f +/- %0.2f" % (mean, stdev) gaus_prob_dist = [] pi = 3.1415926 for k in xrange(num_flips+1): var = expected_stdev**2 num = math.exp(-(float(k)-expected_mean)**2/(2*var)) denom = math.sqrt(2*pi*var) gaus_prob = num/denom gaus_prob_dist.append(gaus_prob) plt.figure() plt.plot(gaus_prob_dist, color='green', linewidth=4, linestyle='dashed') plt.show() ## overlay the analytic distributions over the data plt.figure() plt.hist(head_counts, bins=range(num_flips), normed=True) plt.plot(prob_dist, color='red', linewidth=4) plt.plot(gaus_prob_dist, color='green', linewidth=4, linestyle='dashed') plt.show()