from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
3 > 1
type(3 > 1)
3 < 1
True
3 == 3
3 = 3
x = 14
y = 3
x > 10
12 < x < 18
12 < x
x < 18
12 < x-y < 18
x > 10 and y > 5
pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets == 'dog'
0 + 1 + 0 + 0 + 1 + 0
sum(make_array(False, True, False, False, True, False))
sum(pets == 'dog')
np.count_nonzero(pets == 'dog')
pets >= 'cat'
sum(pets > 'cat')
where
¶terms = Table().with_column('Semester', np.arange(1, 9))
terms
terms.where('Semester', are.above(6))
is_senior = are.above(6)
is_senior(7)
def also_is_senior(x):
return x > 6
also_is_senior(7)
terms.apply(also_is_senior, 'Semester')
terms.where('Semester', are.above(6))
terms.where('Semester', is_senior)
terms.where('Semester', also_is_senior)
terms.where(terms.apply(also_is_senior, 'Semester'))
terms.apply(also_is_senior, 'Semester')
def year_from_semesters(x):
if x <= 0:
return 'Not a valid input'
elif x <= 2:
return 'First Year'
elif x <= 4:
return 'Sophomore'
elif x <= 6:
return 'Junior'
elif x <= 8:
return 'Senior'
else:
return "NA"
year_from_semesters(-15.6)
year_from_semesters(5)
year_from_semesters(9001)
terms.with_column('Year', terms.apply(year_from_semesters, 'Semester'))
first = np.arange(1, 6)
second = np.arange(6, 11)
first
np.append(first, 30)
np.append(first, second)
first
first_second = np.append(first, second)
first_second
two_groups = make_array('treatment', 'control')
two_groups
np.random.choice(two_groups)
np.random.choice(two_groups)
np.random.choice(two_groups, 10)
sum(np.random.choice(two_groups, 10) == 'treatment')
sum(np.random.choice(two_groups, 10) == 'control')
outcomes = np.random.choice(two_groups, 10)
outcomes
sum(outcomes == 'treatment')
sum(outcomes == 'control')
coin = ['heads', 'tails']
np.random.choice(coin)
tosses = np.random.choice(coin, 100)
tosses
sum(tosses == 'heads')
outcomes = make_array()
outcomes
np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes
outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes
For
Statements¶for yale in np.arange(4):
print(yale**2)
print(0**2)
print(1**2)
print(2**2)
print(3**2)
outcomes = make_array()
for i in np.arange(7):
outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes
outcomes = make_array()
for i in np.arange(10000):
outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
Table().with_column(
'Heads in 100 Tosses', outcomes
).hist(bins = np.arange(25.5, 75.5))
Now you get to try. Instead of tossing a coin 100 times, suppose you roll TWO 6-sided dice and consider the sum of their outcomes.
What is the estimated probability that the sum is less than or equal to 5? Use 10,000 runs of your experiment to estimate this.
Hints:
#Outcomes
roll = np.arange(1,7)
roll
#Two rolls
np.random.choice(roll,2)
#Sum of two rolls
sum(np.random.choice(roll,2))
#Setup iterations
outcomes = make_array()
for i in np.arange(10000):
total = sum(np.random.choice(roll,2))
outcomes = np.append(outcomes, total <= 5)
outcomes_average = sum(outcomes)/len(outcomes)
outcomes_average
#Another option
roll1 = np.random.choice(roll,10000)
roll2 = np.random.choice(roll,10000)
total = roll1+roll2
outcomes_under_five = total <= 5
sum(outcomes_under_five)/len(total)