In [ ]:

from datascience import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Decisions¶

In [ ]:

# np.array(list) converts list to an array
# provided all the elements of list are of the same type

n = 100
second = round(n * 0.6)
third = round(n * 0.4)

year = np.array(['Second'] * second + ['Third'] * third)
major = np.array(['Declared'] * (round(second * 0.5)) + ['Undeclared'] * (round(second * 0.5)) + \
                 ['Declared'] * (round(third * 0.8))  + ['Undeclared'] * (round(third * 0.2)))
                 
students = Table().with_columns(
    'Year', year,
    'Major', major
)
students.show(3)

In [ ]:

students.pivot('Major', 'Year')

In [ ]:

# Chance of third year, given that they have declared
# P(third year | declared) = 32/(30+32)

32 / 62

In [ ]:

# P(second year | declared)

30 / 62

Bayes' Rule¶

In [ ]:

# Proportion of 2nd Years among students who are Declared
(0.6 * 0.5) / ((0.6 * 0.5) + (0.4 * 0.8))

In [ ]:

# Proportion of 3rd Years among students who are Declared
(0.4 * 0.8) / ((0.6 * 0.5) + (0.4 * 0.8))

Interpretation by Physicians of Clinical Laboratory Results (1978)¶

In [ ]:

#Problem did not give the true positive rate (it was assumed to be 100%)
(0.001 * 1) / ((0.001 * 1) + (0.999 * 0.05))

In [ ]:

n = 10000
disease = round(n * 0.001)
no_disease = round(n * 0.999)

status = np.array(['Disease'] * disease + ['No disease'] * no_disease)
result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05))  + \
                 ['Test -'] * (round(no_disease * 0.95)))
                 
persons = Table().with_columns(
    'Status', status,
    'Test Result', result
)
persons.show(3)

In [ ]:

#Among the Test+ column, more people do *not* have the disease than do
persons.pivot('Test Result', 'Status')

In [ ]:

# Number with "No disease"
500 + 9490

In [ ]:

# P(Disease | Test +)
10/510

Using Subjective Prior Probabilities¶

In [ ]:

#P(Disease | positive), P(Disease) = .1
(0.1 * 1) / ((0.1 * 1) + (0.999 * 0.05))

In [ ]:

#P(Disease | positive), P(Disease) = .5
(0.5 * 1) / ((0.5 * 1) + (0.5 * 0.05))

In [ ]:

def create_population(prior_disease_prob, n):
    disease = round(n * prior_disease_prob)
    no_disease = round(n * (1 - prior_disease_prob))

    status = np.array(['Disease'] * disease  +  ['No disease'] * no_disease)
    result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05))  + \
                 ['Test -'] * (round(no_disease * 0.95)))
                 
    t = Table().with_columns(
    'Status', status,
    'Test Result', result
    )
    return t.pivot('Test Result', 'Status')

In [ ]:

create_population(0.5, 10000)

In [ ]:

5000 / (5000 + 250)

In [ ]: