In [ ]:

from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Comparison¶

In [ ]:

3 > 1

In [ ]:

type(3 > 1)

In [ ]:

3 < 1

In [ ]:

True

In [ ]:

3 == 3

In [ ]:

3 = 3

In [ ]:

x = 14
y = 3

In [ ]:

x > 10

In [ ]:

12 < x < 18

In [ ]:

12 < x

In [ ]:

x < 18

In [ ]:

12 < x-y < 18

In [ ]:

x > 10 and y > 5

In [ ]:

Comparisons with arrays¶

In [ ]:

pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')

In [ ]:

pets == 'dog'

In [ ]:

0 + 1 + 0 + 0 + 1 + 0

In [ ]:

sum(make_array(False, True, False, False, True, False))

In [ ]:

sum(pets == 'dog')

In [ ]:

np.count_nonzero(pets == 'dog')

In [ ]:

pets >= 'cat'

In [ ]:

sum(pets > 'cat')

In [ ]:

Predicates and advanced `where`¶

In [ ]:

terms = Table().with_column('Semester', np.arange(1, 9))
terms

In [ ]:

terms.where('Semester', are.above(6))

In [ ]:

is_senior = are.above(6)

In [ ]:

is_senior(7)

In [ ]:

def also_is_senior(x):
    return x > 6

In [ ]:

also_is_senior(7)

In [ ]:

terms.apply(also_is_senior, 'Semester')

In [ ]:

terms.where('Semester', are.above(6))

In [ ]:

terms.where('Semester', is_senior)

In [ ]:

terms.where('Semester', also_is_senior)

In [ ]:

terms.where(terms.apply(also_is_senior, 'Semester'))

In [ ]:

terms.apply(also_is_senior, 'Semester')

Conditional Statements¶

In [ ]:

def year_from_semesters(x):
    if x <= 0:
        return 'Not a valid input'
    elif x <= 2:
        return 'First Year'
    elif x <= 4:
        return 'Sophomore'
    elif x <= 6:
        return 'Junior'
    elif x <= 8:
        return 'Senior'
    else:
        return "NA"

In [ ]:

year_from_semesters(-15.6)

In [ ]:

year_from_semesters(5)

In [ ]:

year_from_semesters(9001)

In [ ]:

terms.with_column('Year', terms.apply(year_from_semesters, 'Semester'))

In [ ]:

Appending Arrays¶

In [ ]:

first = np.arange(1, 6)
second = np.arange(6, 11)

In [ ]:

first

In [ ]:

np.append(first, 30)

In [ ]:

np.append(first, second)

In [ ]:

first

In [ ]:

first_second = np.append(first, second)
first_second

In [ ]:

Random Selection¶

In [ ]:

two_groups = make_array('treatment', 'control')
two_groups

In [ ]:

np.random.choice(two_groups)

In [ ]:

np.random.choice(two_groups)

In [ ]:

np.random.choice(two_groups, 10)

In [ ]:

sum(np.random.choice(two_groups, 10) == 'treatment')

In [ ]:

sum(np.random.choice(two_groups, 10) == 'control')

In [ ]:

outcomes = np.random.choice(two_groups, 10)

In [ ]:

outcomes

In [ ]:

sum(outcomes == 'treatment')

In [ ]:

sum(outcomes == 'control')

A simple experiment¶

In [ ]:

coin = ['heads', 'tails']

In [ ]:

np.random.choice(coin)

In [ ]:

tosses = np.random.choice(coin, 100)
tosses

In [ ]:

sum(tosses == 'heads')

In [ ]:

outcomes = make_array()

In [ ]:

outcomes

In [ ]:

np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))

In [ ]:

outcomes

In [ ]:

outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes

In [ ]:

`For` Statements¶

In [ ]:

for yale in np.arange(4):
    print(yale**2)

print(0**2)
print(1**2)
print(2**2)
print(3**2)

In [ ]:

outcomes = make_array()

for i in np.arange(7):
    outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))
outcomes

In [ ]:

outcomes = make_array()

for i in np.arange(10000):
    outcomes = np.append(outcomes, sum(np.random.choice(coin, 100) == 'heads'))

In [ ]:

Table().with_column(
    'Heads in 100 Tosses', outcomes
).hist(bins = np.arange(25.5, 75.5))

In [ ]:

Now you get to try. Instead of tossing a coin 100 times, suppose you roll TWO 6-sided dice and consider the sum of their outcomes.
What is the estimated probability that the sum is less than or equal to 5? Use 10,000 runs of your experiment to estimate this. Hints:

You will need to define the possible outcomes of 1,...,6
Then use the possible outcomes with np.random.choice twice - one for each roll.

In [ ]:

#Outcomes
roll = np.arange(1,7)
roll

In [ ]:

#Two rolls
np.random.choice(roll,2)

In [ ]:

#Sum of two rolls
sum(np.random.choice(roll,2))

In [ ]:

#Setup iterations
outcomes = make_array()

for i in np.arange(10000):
    total = sum(np.random.choice(roll,2))
    outcomes = np.append(outcomes, total <= 5)
    
outcomes_average = sum(outcomes)/len(outcomes)
outcomes_average

In [ ]:

#Another option
roll1 = np.random.choice(roll,10000)
roll2 = np.random.choice(roll,10000)
total = roll1+roll2
outcomes_under_five = total <= 5
sum(outcomes_under_five)/len(total)

In [ ]:

Comparison¶

Comparisons with arrays¶

Predicates and advanced where¶

Conditional Statements¶

Appending Arrays¶

Random Selection¶

A simple experiment¶

For Statements¶

Predicates and advanced `where`¶

`For` Statements¶