#!/usr/bin/env python # coding: utf-8 # ![alt text](https://github.com/callysto/callysto-sample-notebooks/blob/master/notebooks/images/Callysto_Notebook-Banner_Top_06.06.18.jpg?raw=true) # In[1]: #Running this cell displays a button to toggle hidden code #From: http://chris-said.io/2016/02/13/how-to-make-polished-jupyter-presentations-with-optional-code-visibility/ from IPython.display import HTML HTML(''' ''') # # Central Limit Theorem Experiments # # **Please note: If you have hidden the code using the toggle in the cell above, the cells will appear as an empty line. However, they can still be run by pressing shift-enter with the empty cell selected.** # ### Experiment 1(a): # # Toss a fair coin. If it lands heads, you win 100 dollars, and if it lands tails, you win 0 dollars. Run this experiment X times, where X is chosen by the user. We get a graph of the results, and discover that we get roughly a mean of 50 dollars and a standard deviation of 50 dollars. # # Note: In this plot and all others, the mean will be displayed as a vertical red line, and one standard deviation away from the mean on both sides will be displayed as a horizontal red line. # In[2]: #EXPERIMENT 1A #SETUP import numpy as np import plotly import plotly.offline as py import plotly.graph_objs as go py.init_notebook_mode(connected=False) get_ipython().run_line_magic('matplotlib', 'inline') #Takes input from the user for the value of X: print("How many times would you like to run the experiment? ") X = int(input()) X1 = X #Stores the data on wins and losses: win_data = [] #Performs the experiment X times: for n in range(0, X): #Flips a coin. 100 represents heads, 0 represents tails: flip = np.random.choice((100, 0)) #Updates win_data with a win or a loss: win_data.append(flip) #Calculates the mean: mean = np.mean(win_data) #Calculates the standard deviation: sd = np.std(win_data) #Graphs a histogram of the data: data = [go.Histogram(x = win_data, xbins=dict( start=-10, end=110, size=10 ))] title = "Experiment 1(a) Winnings (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Winnings', range = [0,110], fixedrange = True, ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})] ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Experiment 1 Data') # ### Experiment 1(b) # # Toss a fair coin one hundred times, getting 1 dollar for each head and 0 dollars for each tail. Run this 100-trial experiment X times, where X is chosen by the user. We get a graph of the results, and discover that we get roughly a mean of 50 dollars and a standard deviation of 5 dollars. # In[3]: #EXPERIMENT 1B #Takes input from the user for the value of X: print("How many times would you like to run the experiment? ") X = int(input()) X2 = X #Stores the data on wins and losses: win_data = [] #Performs the experiment X times: for n in range(0, X1): #Stores the amount won: winnings = 0 #Flips 100 coins: for m in range(0, 100): #Flips a coin. 1 represents heads, 0 represents tails: flip = np.random.choice((1, 0)) #Updates winnings: winnings += flip #Updates win_data with the final total for this run of the experiment: win_data.append(winnings) #Calculates the mean: mean = np.mean(win_data) #Calculates the standard deviation: sd = np.std(win_data) #Graphs a histogram of the data: data = [go.Histogram(x = win_data, xbins=dict( start=-10, end=110, size=5 ))] title = "Experiment 1(b) Winnings (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Winnings', range = [0,110], fixedrange = True, ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})] ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Experiment 1 Data') # ### Experiment 2(a) # # For experiments 2(a) and 2(b), we'll re-run the above experiments, and this time you can specify the value of N. # # Flip X coins (with the same X from experiment 1(a)), and on each coin flip, you win N dollars or win 0 dollars. # In[4]: #EXPERIMENT 2A #Takes input from the user for the value of N: print("How much money will a winning flip be worth? ") N = int(input()) print(f"The experiment will be run {X1} times, as in experiment 1(a).") #Stores the data on wins and losses: win_data = [] #Performs the experiment X times: for n in range(0, X): #Flips a coin. N represents heads, 0 represents tails: flip = np.random.choice((N, 0)) #Updates win_data with a win or a loss: win_data.append(flip) #Calculates the mean: mean = np.mean(win_data) #Calculates the standard deviation: sd = np.std(win_data) #Graphs a histogram of the data: data = [go.Histogram(x = win_data, xbins=dict( start=min(win_data), end=max(win_data) + (max(win_data) - min(win_data))/15, size=(max(win_data) - min(win_data))/15 ))] title = "Experiment 2(a) Winnings (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Winnings', #range = [0,110], #fixedrange = True, ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})] ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Experiment 1 Data') # ### Experiment 2(b) # # Toss a fair coin N times, getting 1 dollar for each head and 0 dollars for each tail. Run this N-trial experiment X times, where X is the same as specified in experiment 1(b). # In[5]: #EXPERIMENT 2B print(f"The experiment will be run {X2} times, as in experiment 1(b).") #Stores the data on wins and losses: win_data = [] #Performs the experiment X times: X = X2 for n in range(0, X): #Stores the amount won: winnings = 0 #Flips 100 coins: for m in range(0, N): #Flips a coin. 1 represents heads, 0 represents tails: flip = np.random.choice((1, 0)) #Updates winnings: winnings += flip #Updates win_data with the final total for this run of the experiment: win_data.append(winnings) #Calculates the mean: mean = np.mean(win_data) #Calculates the standard deviation: sd = np.std(win_data) #Graphs a histogram of the data: data = [go.Histogram(x = win_data, xbins=dict( start=min(win_data), end=max(win_data) + (max(win_data) - min(win_data))/15, size=(max(win_data) - min(win_data))/15 ))] title = "Experiment 2(b) Winnings (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Winnings', #range = [0,110], #fixedrange = True, ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})] ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Experiment 1 Data') # ### Experiment 3(a) # Toss a 6-sided die. In this experiment, we toss X of these dice, where X is specified by the user. We determine the mean and standard deviation of this experiment. # In[6]: #EXPERIMENT 3A #Takes input from the user for the value of X: print("How many 6-sided dice would you like to roll? ") X = int(input()) X3 = X #Stores the data: win_data = [] #Performs the experiment X times: for n in range(0, X): #Rolls a die: roll = np.random.choice((1,2,3,4,5,6)) #Updates win_data with the roll value: win_data.append(roll) #Calculates the mean: mean = np.mean(win_data) #Calculates the standard deviation: sd = np.std(win_data) #Graphs a histogram of the data: data = [go.Histogram(x = win_data, xbins=dict( start=min(win_data) - .5, end=(max(win_data) + (max(win_data) - min(win_data))/15) + .5, size=1 ))] title = "Experiment 3(a) Results (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Value', #range = [0,110], #fixedrange = True, ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})] ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Experiment 1 Data') # ### Experiment 3(b) # Toss a 6-sided die. In this experiment, we toss 4 of these dice, and mark the average. We do this X times, where X is specified by the user. We determine the mean and standard deviation of this experiment, showing that the standard deviation reduces by a factor of sqrt(4). # In[7]: #EXPERIMENT 3B #Takes input from the user for the value of X: print("How many experiments would you like to run? ") X = int(input()) X4 = X #Stores the data: mean_data = [] #Runs X experiments: for m in range(0, X): #Stores the results of the 4 rolls in this experiment temp_data = [] #Rolls 4 dice: for n in range(0, 4): #Rolls a die: roll = np.random.choice((1,2,3,4,5,6)) #Updates temp_data with the roll value: temp_data.append(roll) #Takes the average of the 4 rolls, appends it to mean_data: mean = np.mean(temp_data) mean_data.append(mean) #Calculates the mean: mean = np.mean(mean_data) #Calculates the standard deviation: sd = np.std(mean_data) #Graphs a histogram of the data: data = [go.Histogram(x = mean_data, xbins=dict( start=min(mean_data), end=max(mean_data) + (max(mean_data) - min(mean_data))/15, size=(max(mean_data) - min(mean_data))/15 ))] title = "Experiment 3(b) Results (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Value', #range = [0,110], #fixedrange = True, ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})] ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Experiment 1 Data') # ### Experiment 4 # Re-run the above experiment, except we change 4 with N. What happens to the standard deviations as N gets larger? # In[8]: #EXPERIMENT 4 #Takes input from the user for the value of X: print("How many 6-sided dice would you like to roll for each experiment? ") N = int(input()) print(f"The experiment will be run {X4} times, as in experiment 3(b).") X = X4 #Stores the data: mean_data = [] #Runs X experiments: for m in range(0, X): #Stores the results of the N rolls in this experiment temp_data = [] #Rolls N dice: for n in range(0, N): #Rolls a die: roll = np.random.choice((1,2,3,4,5,6)) #Updates temp_data with the roll value: temp_data.append(roll) #Takes the average of the N rolls, appends it to mean_data: mean = np.mean(temp_data) mean_data.append(mean) #Calculates the mean: mean = np.mean(mean_data) #Calculates the standard deviation: sd = np.std(mean_data) print() print("The mean of the average rolls was: {:.3f}".format(mean)) print("The standard deviation was: {:.3f}".format(sd)) # ### Experiment 5 # # Toss a 6-sided die, but this time, the six values are chosen by the user. (For example, it can be 1,2,3,4,5,10000). The user will specify X (the number of dice to be rolled for an individual trial) and N (the number of trials). The average of each of the N trials will be plotted. # # As a starting point, select the dice numbers to be 1,2,3,4,5,10, then select N=100 and X=10000. See how "normal" the result is. # # Experiment with different values to reproduce the Central Limit Theorem! # In[23]: #EXPERIMENT 5 #Choosing values for the die: print("Choose what you want the values on the dice to be:") print() print("First value: ") V1 = int(input()) print("Second value: ") V2 = int(input()) print("Third value: ") V3 = int(input()) print("Fourth value: ") V4 = int(input()) print("Fifth value: ") V5 = int(input()) print("Sixth value: ") V6 = int(input()) values = [V1,V2,V3,V4,V5,V6] print() print("The values you have chosen are:") print(values) #Asking for the number of times the dice will be rolled per trial: print() print("Please choose a value for N - the number of dice we will be rolling for an individual trial.") N = int(input()) #Asking for the number of trials: print() print("Please choose a value for X - the number of trials for which we will roll N dice.") X = int(input()) sample_means = [] #Performing all trials: for n in range(0, X): sample = [] #Rolling all dice for a trial: for m in range(0, N): num = np.random.choice(values) sample.append(num) #Calculating mean of a trial: mean = sum(sample)/len(sample) sample_means.append(mean) #Plotting the sample means: data = [go.Histogram(x = sample_means)] mean = sum(sample_means)/len(sample_means) sd = np.std(sample_means) title = "Sample Means (Mean: {0:.2f}, Standard deviation: {1:.2f})".format(mean, sd) layout = go.Layout( title=title, xaxis=dict( title='Value', #range = [min(sample_means),max(sample_means)+1], fixedrange = True, #tick0 = 0, #dtick = 1 ), yaxis=dict( title='Count', fixedrange = False ), shapes=[dict({ 'type': 'line', 'x0': mean, 'y0': 0, 'x1': mean, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': { 'color': '#f44242', 'width': 3 }}), dict({ 'type': 'line', 'x0': mean - sd, 'y0': 0, 'x1': mean + sd, 'y1': 0, 'line': { 'color': '#f44242', 'width': 10 }})]) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='Sample') # #### ![alt text](https://github.com/callysto/callysto-sample-notebooks/blob/master/notebooks/images/Callysto_Notebook-Banners_Bottom_06.06.18.jpg?raw=true)