#!/usr/bin/env python
# coding: utf-8

# We shall work on several concepts of statistics in this notebook, always in the context of data coming from independent Bernoulli trials with an unknown parameter $p$.

# Assume that we have a coin flipping experiment with a coin, chosen from a pool of coins, whose success probability is $p$. Let us consider $1$ as success and $0$ a fail. The law which determines $p$ has a density $\pi_0(p)$. Let us then flip the coin $k$ times with result $x_1,\ldots,x_k$. What is the probability law for $p$ given $x_1,\ldots,x_k$.

# In[122]:


import numpy

p0 = 0.8 #numpy.random.uniform(0,1,1)
def pi0(p):
    return 1

k=100
x = numpy.random.binomial(1,p0,k)
y = numpy.cumsum(x)
print(p0)
print(x)
#print(y)


# # Bayesian Approach
# 
# This a typical question for conditional distribution. We consider the random variables $X_1,\ldots,X_k$ which are Bernoulli distributed with success parameter $p$. Then
# $$
# P(X_1=x_1,\ldots,X_k=x_k|p) = p^{(x_1+\ldots+x_k)}(1-p)^{(k-(x_1+\ldots+x_k))}
# $$
# holds true. Bayes' theorem says
# $$
# P(p|x_1,\ldots,x_k)=\frac{p^{(x_1+\ldots+x_k)}(1-p)^{(k-(x_1+\ldots+x_k))}\pi_0(p)}{\int_0^1p^{(x_1+\ldots+x_k)}(1-p)^{(k-(x_1+\ldots+x_k))}\pi_0(p)dp} \, .
# $$

# In[123]:


import matplotlib.pyplot as plt

for i in range(k):
    p = numpy.linspace(0,1,1000)
    l = p**y[i]*(1-p)**(i+1-y[i])*pi0(p)
    l = l/numpy.sum(l)*1000
    plt.plot(p,l)
    plt.scatter(numpy.array([p0]),numpy.array([0]))
    plt.show()
    print(numpy.mean(l*p))


# # Maximum Likelihood Estimator

# We are writing now the sequence of maximum likelihood estimators as red point denoting the argument where the maximum is taken of the likelihood function.

# In[124]:


for i in range(k):
    p = numpy.linspace(0,1,1000)
    l = p**y[i]*(1-p)**(i+1-y[i])
    plt.plot(p,l)
    plt.scatter(numpy.array([y[i]/(i+1)]),numpy.array([0]),color='r')
    plt.show()


# # Tests
# 
# We are testing in the sequel whether rather the null hypothesis $p_0$ holds true or the alternative $p_1$: we first plot the likelihood for different values of $p$ given the data (Bayesian approach!), and we also plot the likelihood ratio function (actually its logarithm) depending on data given null and alternative hypothesis.

# In[125]:


p0 = 0.5
p1 = 0.7

for i in range(k):
    p = numpy.linspace(0,1,1000)
    l = p**y[i]*(1-p)**(i+1-y[i])
    l = l / numpy.mean(l)
    plt.plot(p,l)
    plt.scatter(numpy.array([p0]),numpy.array([0]),color='r')
    plt.scatter(numpy.array([p1]),numpy.array([0]),color='g')
    print('Likelihood-Ratio: ',numpy.log(p0**y[i]*(1-p0)**(i+1-y[i])/(p1**y[i]*(1-p1)**(i+1-y[i]))))
    plt.show()


# In[126]:


from scipy import special

p0 = 0.5
p1 = 0.7
k=100

for i in range(1,k):
    cs = numpy.linspace(0,i,i+1)
    test = numpy.ones(i+1)
    cdf = numpy.ones(i+1)
    for l in range(i+1):
        test[l] = numpy.log(p0**cs[l]*(1-p0)**(i-cs[l])/(p1**cs[l]*(1-p1)**(i-cs[l])))
        helper[l] = scipy.special.binom(i+1,l)*p0**cs[l]*(1-p0)**(i-cs[l])
    cdf = numpy.cumsum(helper)
    treshhold = test[numpy.argmax(cdf>0.95)]
    plt.plot(cs,test,color='blue')
    plt.plot(cs,numpy.ones(i+1)*treshhold)
    plt.scatter(numpy.array([y[i-1]]),numpy.array([0]),color='r')
    plt.show()


# # $p$-value
# 
# We shall also calculate the $p$-value, i.e. the probability with which values larger or equal than the actual observation would appear under null hypothesis. It measures how 'extreme' an observation is, given the null hypthesis is true.

# In[127]:


from scipy import special

p0 = 0.5
p1 = 0.7
k=100

for i in range(1,k):
    cs = numpy.linspace(0,i,i+1)
    c0 = numpy.zeros(i+1)
    for l in range(i):
        c0[l] = scipy.special.binom(i,l)*p0**(cs[l])*(1-p0)**(i-cs[l])
    plt.plot(cs,c0,color='blue')
    plt.scatter(numpy.array([y[i-1]]),numpy.array([0]),color='r')
    print(1-numpy.cumsum(c0)[y[i-1]])
    plt.show()


# In[117]:


numpy.mean(x)


# # $t$-test
# 
# We collect two samples of normally distributed data in an unpaired way. We test whether the difference of expectations is $0$ (null hypothesis)

# In[128]:


## Import the packages
import numpy
from scipy import stats


## Define 2 random distributions
#Sample Size
N = 10
#Gaussian distributed data with mean = 1 and var = 1
a = np.random.randn(N) + 1
#Gaussian distributed data with with mean = 0 and var = 1
b = np.random.randn(N)


# In[129]:


# calculate the empirical standard deviations with unbiased estimators

var_a = a.var(ddof=1)
var_b = b.var(ddof=1)

#std deviation
s = numpy.sqrt((var_a + var_b)/2)

# Calculate the t-statistics
t = (a.mean() - b.mean())/(s*numpy.sqrt(2/N))

# Compare with the critical t-value
# Degrees of freedom
df = 2*N - 2

#p-value after comparison with the t 
p = 1 - stats.t.cdf(t,df=df)

print("t = " + str(t))
print("p = " + str(2*p))
# You can see that after comparing the t statistic with the critical t value (computed internally) we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean of the two distributions are different and statistically significant.

## Cross Checking with the internal scipy function
t2, p2 = stats.ttest_ind(a,b)
print("t = " + str(t2))
print("p = " + str(p2))

x = numpy.linspace(-5,5,1000)

plt.plot(x,stats.t.pdf(x,df=df))
plt.scatter(numpy.array([t]),numpy.array([0]),color='red')
plt.show()


# # Bayesian Approach in a Gaussian world -- the Kalman filter

# In[10]:


import matplotlib.pyplot as plt
import numpy

mu = 1
sigmaX = 1
sigmaZ = 1
n = 100

X = numpy.random.normal(mu,sigmaX,1)*numpy.ones(n)
Z = numpy.random.normal(0,sigmaZ,n)
Y = X + Z
Xhat = numpy.ones(n)

for i in range(n):
    x = numpy.linspace(-10,10,1000)
    l = numpy.exp(-0.5*(Y[i]-x)**2/sigmaZ**2)*numpy.exp(-0.5*(x-mu)**2/sigmaX**2)
    l = l/numpy.sum(l)*1000/20
    plt.plot(x,l)
    plt.scatter(numpy.array([X[0]]),numpy.array([0]))
    plt.show()
    Xhat[i] = numpy.sum(l*x)*20/1000
    helper=(mu/sigmaX**2+Y[i]/sigmaZ**2)
    sigmaX = numpy.sqrt(1/(1/sigmaX**2+1/sigmaZ**2))
    mu = sigmaX**2*helper


# In[11]:


plt.plot(X)
plt.plot(Y)
plt.plot(Xhat)
plt.show()


# In[ ]: