In [ ]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Comparison

In [ ]:
3 > 1
In [ ]:
type(3 > 1)
In [ ]:
3 < 1
In [ ]:
True
In [ ]:
3 == 3
In [ ]:
3 = 3
In [ ]:
x = 14
y = 3
In [ ]:
x > 10
In [ ]:
12 < x < 18
In [ ]:
12 < x
In [ ]:
x < 18
In [ ]:
12 < x-y < 18
In [ ]:
x > 10 and y > 5
In [ ]:
 

Comparisons with arrays

In [ ]:
pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
In [ ]:
pets == 'dog'
In [ ]:
0 + 1 + 0 + 0 + 1 + 0
In [ ]:
sum(make_array(False, True, False, False, True, False))
In [ ]:
sum(pets == 'dog')
In [ ]:
np.count_nonzero(pets == 'dog')
In [ ]:
pets >= 'cat'
In [ ]:
sum(pets > 'cat')
In [ ]:
 
In [ ]:
 

Predicates and advanced where

In [ ]:
terms = Table().with_column('Semester', np.arange(1, 9))
terms
In [ ]:
terms.where('Semester', are.above(6))
In [ ]:
is_senior = are.above(6)
In [ ]:
is_senior(7)
In [ ]:
def also_is_senior(x):
    return x > 6
In [ ]:
also_is_senior(7)
In [ ]:
terms.apply(also_is_senior, 'Semester')
In [ ]:
terms.where('Semester', are.above(6))
In [ ]:
terms.where('Semester', is_senior)
In [ ]:
terms.where('Semester', also_is_senior)
In [ ]:
terms.where(terms.apply(also_is_senior, 'Semester'))
In [ ]:
terms.apply(also_is_senior, 'Semester')

Conditional Statements

In [ ]:
def year_from_semesters(x):
    if x <= 0:
        return 'Not a valid input'
    elif x <= 2:
        return 'First Year'
    elif x <= 4:
        return 'Sophomore'
    elif x <= 6:
        return 'Junior'
    elif x <= 8:
        return 'Senior'
    else:
        return "NA"
In [ ]:
year_from_semesters(-15.6)
In [ ]:
year_from_semesters(5)
In [ ]:
year_from_semesters(9001)
In [ ]:
terms.with_column('Year', terms.apply(year_from_semesters, 'Semester'))
In [ ]:
 

Appending Arrays

In [ ]:
first = np.arange(1, 6)
second = np.arange(6, 11)
In [ ]:
first
In [ ]:
np.append(first, 30)
In [ ]:
np.append(first, second)
In [ ]:
first
In [ ]:
first_second = np.append(first, second)
first_second
In [ ]:
 

Random Selection

In [ ]:
two_groups = make_array('treatment', 'control')
two_groups
In [ ]:
np.random.choice(two_groups)
In [ ]:
np.random.choice(two_groups)
In [ ]:
np.random.choice(two_groups, 10)
In [ ]:
sum(np.random.choice(two_groups, 10) == 'treatment')
In [ ]:
sum(np.random.choice(two_groups, 10) == 'control')
In [ ]:
outcomes = np.random.choice(two_groups, 10)
In [ ]:
outcomes
In [ ]:
sum(outcomes == 'treatment')
In [ ]:
sum(outcomes == 'control')

A simple experiment

In [ ]:
coin = ['heads', 'tails']
In [ ]:
np.random.choice(coin)
In [ ]:
tosses = np.random.choice(coin, 100)
tosses
In [ ]:
sum(tosses == 'heads')
In [ ]:
outcomes = make_array()
In [ ]:
outcomes
In [ ]:
np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
In [ ]:
outcomes
In [ ]:
outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes
In [ ]:
 

For Statements

In [ ]:
for yale in np.arange(4):
    print(yale**2)

print(0**2)
print(1**2)
print(2**2)
print(3**2)
In [ ]:
outcomes = make_array()

for i in np.arange(7):
    outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes
In [ ]:
outcomes = make_array()

for i in np.arange(10000):
    outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
In [ ]:
Table().with_column(
    'Heads in 100 Tosses', outcomes
).hist(bins = np.arange(25.5, 75.5))
In [ ]:
 
In [ ]:
 
In [ ]:
 

Now you get to try. Instead of tossing a coin 100 times, suppose you roll TWO 6-sided dice and consider the sum of their outcomes.
What is the estimated probability that the sum is less than or equal to 5? Use 10,000 runs of your experiment to estimate this. Hints:

  1. You will need to define the possible outcomes of 1,...,6
  2. Then use the possible outcomes with np.random.choice twice - one for each roll.
In [ ]:

In [ ]:
#Outcomes
roll = np.arange(1,7)
roll
In [ ]:
#Two rolls
np.random.choice(roll,2)
In [ ]:
#Sum of two rolls
sum(np.random.choice(roll,2))
In [ ]:
#Setup iterations
outcomes = make_array()

for i in np.arange(10000):
    total = sum(np.random.choice(roll,2))
    outcomes = np.append(outcomes, total <= 5)
    
outcomes_average = sum(outcomes)/len(outcomes)
outcomes_average
In [ ]:
#Another option
roll1 = np.random.choice(roll,10000)
roll2 = np.random.choice(roll,10000)
total = roll1+roll2
outcomes_under_five = total <= 5
sum(outcomes_under_five)/len(total)
In [ ]:
 
In [ ]: