#!/usr/bin/env python
# coding: utf-8
# # Introduction to machine learning
#
# *Maxime Sangnier*
#
# Fall, 2023
#
# ## Practical session 1: discriminant analysis, logistic regression and boosting
# # Table of contents
# 1. [Discriminant analysis](#part1)
# - [Linear discriminant analysis](#part1sec1)
# - [Quadratic discriminant analysis](#part1sec2)
# - [Fisher discriminant analysis](#part1sec3)
# 1. [Logistic regression](#part2)
# 1. [Adaboost](#part3)
#
# In[1]:
from mllab import *
# # Discriminant analysis
# ## Linear discriminant analysis
# >The `covariance` function makes it possible to build a $2 \times 2$ covariance matrix based on spreads $\sigma_1$ and $\sigma_2$, and the angle $\theta$.
# In[2]:
get_ipython().run_line_magic('pinfo', 'covariance')
# >Based on the Cholesky decomposition of a $2 \times 2$ covariance matrix $\Sigma$, write a function that generates a multivariate Gaussian $n$-sample of mean $\mu \in \mathbb R^2$ and covariance $\Sigma$.
# The corresponding numpy array should be of size $(n, 2)$.
#
# >Compute the mean and the empirical covariance of the sample using Numpy routines.
# In[ ]:
# Answer
def gaussian_sample(mu=[0, 0], sigma1=1., sigma2=1., theta=0., n=50):
# Todo
# End todo
return X
# Todo
# End todo
# >Generate two multivariate Gaussian samples of size $n_1 = n_2 = 50$ with different means and equal covariance matrices.
# Plot both samples with different markers by using the function `plotXY`.
# In[ ]:
# Answer
# >Write a function that creates and returns two Numpy arrays `X` and `y` such that `X` is the data matrix (of size $n \times 2$) and `y` is the vector of labels (of size $n$, with values $\pm 1$) corresponding to i.i.d. copies of $(X, Y)$ distributed such that:
# $$
# \begin{cases}
# \forall i \in \{\pm 1\}: X~|~Y=i \sim \mathcal N (\mu_i, \Sigma_i)\\
# \mathbb P(Y=1) = \pi, \mathbb P(y=-1)=1-\pi.
# \end{cases}
# $$
# >
# >Create a dataset of size $n=100$.
# In[ ]:
# Answer
def sample_classif(weight=0.5,
param1=dict(mu=[0, 0], sigma1=1., sigma2=1.),
param2=dict(mu=[0, 0], sigma1=1., sigma2=1.),
n=50):
Y = 2 * np.random.binomial(n=1, p=weight, size=n) - 1 # Labels 1 or -1
# Todo
# End todo
return X, Y
# Todo
# End todo
# >Based on the following code, implement a linear discriminant classifier, taking as parameters an $n \times 2$ Numpy array as data and a size-$n$ array of labels.
# In[ ]:
# Answer
from sklearn.base import BaseEstimator
from sklearn.discriminant_analysis import LinearClassifierMixin
class LDA(BaseEstimator, LinearClassifierMixin):
"""
LDA classifier for two classes.
"""
def __init__(self, bias=False):
"""
bias: default normalization (False) of the covariance matrix
estimator is by ``(N - 2)``, where ``N`` is the number of
observations given (unbiased estimate). If `bias` is True,
then normalization is by ``N``.
"""
self.bias = bias
self.yvalues_ = None
self.coef_ = None
self.intercept_ = None
def fit(self, X, y,):
self.yvalues_ = np.unique(y)
assert self.yvalues_.size==2
# Estimate covariance matrix and means
n_pos, n = np.sum(y == self.yvalues_[1]), X.shape[0] # Number of positive labels and size of the dataset
# Todo
# End todo
# Compute direction and intercept
# Todo
# End todo
return self
def decision_function(self, X):
# Compute decisions
# Todo
# End todo
return decisions
def predict(self, X):
# Compute predictions
predictions = np.ones(X.shape[0]) * self.yvalues_[0] # Negative label
# Todo
# End todo
return predictions
# >Fit a linear discriminant classifier on the data `X,y`.
# Plot the data along with the classifier frontiere (use the function `plot_frontiere`).
# In[ ]:
# Answer
# >Compare the result of [scikit-learn LDA](http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html#sklearn.discriminant_analysis.LinearDiscriminantAnalysis) (decision function and frontiere).
# In[ ]:
# Answer
# ## Quadratic discriminant analysis
# >Analyze the behavior of LDA and [QDA](http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.html#sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis) when it is faced to anisotropic Gaussian samples (in particular, check if the frontiere is the bisector of the line segment for which the extremities are both class centers), and then to Gaussian samples with different covariance matrices (you can use `plot_frontiere` with a list of classifiers).
# In[ ]:
# Answer
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
qda = QuadraticDiscriminantAnalysis()
# Gassian parameters
mu1 = [0, 0]
mu2 = [5, 3]
plt.figure(figsize=(20, 20))
for ifig, (param1, param2) in enumerate([(dict(mu=mu1, sigma1=1, sigma2=1, theta=0), dict(mu=mu2, sigma1=1, sigma2=1, theta=0)),
(dict(mu=mu1, sigma1=1, sigma2=5, theta=0), dict(mu=mu2, sigma1=1, sigma2=5, theta=0)),
(dict(mu=mu1, sigma1=1, sigma2=5, theta=np.pi/6), dict(mu=mu2, sigma1=1, sigma2=5, theta=np.pi/6)),
(dict(mu=mu1, sigma1=1, sigma2=5, theta=0), dict(mu=mu2, sigma1=5, sigma2=1, theta=0)),
(dict(mu=mu1, sigma1=1, sigma2=5, theta=0), dict(mu=mu2, sigma1=5, sigma2=1, theta=np.pi/3))]):
# Dataset
# Todo
# End todo
# Discriminant analysis
# Todo
# End todo
# Class means
# Todo
# End todo
# Plot frontieres and class means
# Todo
# End todo
# ## Fisher discriminant analysis
# >Implement the Fisher discriminant analysis based on the following code.
# In practice, what is the difference between LDA and FisherDA?
# In[ ]:
# Answer
class FisherDA(BaseEstimator, LinearClassifierMixin):
"""
Fisher discriminant analysis for two classes.
"""
def __init__(self):
self.coef_ = None
self.intercept_ = None
def fit(self, X, y):
# Estimate prior, covariance matrix and means
# To do
# End todo
# Compute direction and intercept
# Todo
# End todo
self.intercept_ = 0
ypred = self.decision_function(X)
ind = np.argsort(ypred)
err = np.cumsum(y[ind]) + np.sum(y==y.min())
#plt.figure()
#plt.plot(ypred[ind], err) # Error
iintercept = np.argmin(err)
if iintercept < y.size-1:
self.intercept_ = -0.5*(ypred[ind[iintercept]] + ypred[ind[iintercept+1]])
else:
self.intercept_ = -ypred[ind[iintercept]]
return self
def decision_function(self, X):
# Compute decisions
# Todo
# End todo
return decisions
def predict(self, X):
# Compute predictions
# Todo
# End todo
return predictions
# In[ ]:
# Answer
# # Logistic regression
# >We consider that
# $$
# X|Y=1 \sim \mathcal N(0, I)
# \qquad \text{and} \qquad
# X|Y=-1 \sim 0.5 \mathcal N\left(\begin{pmatrix} 5 \\ 3 \end{pmatrix}, I\right) + 0.5 \mathcal N\left(\begin{pmatrix} 8 \\ 9 \end{pmatrix}, I\right)
# \quad \text{(non-Gaussian class)}.
# $$
# Compare LDA and [logistic regression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html).
# In[ ]:
# Answer
# >What about with this dataset (class $-1$ is Gaussian but with an outlier)?
# In[13]:
# Dataset
X, Y = sample_classif(weight=.5,
param1=dict(mu=[0, 0], sigma1=1, sigma2=1, theta=0),
param2=dict(mu=[5, 3], sigma1=1, sigma2=1, theta=0),
n=100)
X[np.argmin(Y)] = np.random.randn(2) + 20
# In[ ]:
# Answer
# # Adaboost
# >We consider the dataset defined below.
# In[15]:
# Dataset
X, Y = sample_classif(weight=.3,
param1=dict(mu=[0, 0], sigma1=10, sigma2=1, theta=np.pi/6),
param2=dict(mu=[0, 0], sigma1=1, sigma2=1, theta=0),
n=100)
X_ng, Y_ng = sample_classif(weight=0.5,
param1=dict(mu=[5, 3], sigma1=3, sigma2=10, theta=np.pi/6),
param2=dict(mu=[-5, -2], sigma1=3, sigma2=10, theta=np.pi/10),
n=X.shape[0])
X[Y==-1] = X_ng[:np.sum(Y==-1)]
plotXY(X, Y)
# >Fit an [Adaboost](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html) classifier with $100$ weak learners and the algorithm SAMME.
# Map the classifier regions on a figure.
# In[ ]:
# Answer
# >Plot on a new figure the estimator errors (attribute `estimator_errors_`).
# What do you observe?
# In[ ]:
# Answer
# >Load the [dataset digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits).
# How many observations, covariates and classes has it?
# [Split](http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) the dataset into two equally sized subsets (one for training, the other for testin, i.e. estimating the true error).
# In[ ]:
# Answer
# >Plot the train and test errors of both algorithms SAMME and SAMME.R with respect to the number of iterations (from 1 to 200) for the dataset digits.
# For this purpose, use [`DecisionTreeClassifier(max_depth=5)`](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html) as base learner.
# In[ ]:
# Answer