#!/usr/bin/env python
# coding: utf-8
# # Decision Trees ::
#
# **Class is implemented only using numpy and visualization are shown using matplotlib. Code for class of decision tree is given in trees.py file**
#
Table of Contents
#
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
# ## Import classifier and regressor from given file ***trees.py***
# In[2]:
from trees import ClassificationTree, RegressionTree
# ## for dataset and spliting, we need sklearn (Optional, if you have your own data)
# In[4]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
# # Classification Tree
# ## Iris Dataset
# Loading and spliting for training and testing
# In[5]:
data = datasets.load_iris()
X = data.data
y = data.target
feature_names = data.feature_names #Optional
Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)
print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)
# ### Fitting a model (displaying the tree building)
# In[6]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=2,feature_names=feature_names)
# ### Plotting the resulting tree
# In[7]:
plt.figure(figsize=(15,8))
clf.plotTree(show=True)
# ### Plotting Tree with color branches
# In[8]:
plt.figure(figsize=(15,8))
clf.plotTree(show=True,DiffBranchColor=True)
# ### Prediting and computing Accuracy
# In[9]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy: ',np.mean(ytp==yt))
print('Testing Accuracy: ',np.mean(ysp==ys))
# ## Iris data with smaller tree
# In[10]:
clf = ClassificationTree(max_depth=2)
clf.fit(Xt,yt,verbose=1,feature_names=feature_names)
#plt.figure(figsize=(15,8))
plt.figure(figsize=(7,6))
clf.plotTree(show=True)
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy: ',np.mean(ytp==yt))
print('Testing Accuracy: ',np.mean(ysp==ys))
# ## Breast Cancer data
# In[11]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names #Optional
Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)
print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)
# ### Fitting model with displaying the details of tree in process (verbose=2)
# In[12]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=2,feature_names=feature_names)
# ### Fitting model with displaying the progress only (verbose=1)
# In[13]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=1,feature_names=feature_names)
# ### Plotting Decison Tree
# plt.figure(figsize=(15,8))
# clf.plotTree(show=True)
# ### Predicting and computing MSE
# In[15]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy: ',np.mean(ytp==yt))
print('Testing Accuracy: ',np.mean(ysp==ys))
# **It's overfitting, try with smaller trees by decresing the max_depth of classifier**
# # Regression Tree
# ## Boston House price
# In[16]:
data = datasets.load_boston()
X = data.data
y = data.target
feature_names = data.feature_names #Optional
Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)
print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)
# In[17]:
rgr = RegressionTree()
rgr.fit(Xt,yt,verbose=1,feature_names = feature_names)
# In[19]:
plt.figure(figsize=(15,15))
rgr.plotTree(show=True,scale=True, showtitle =False, showDirection=False,DiffBranchColor=True)
# In[20]:
ytp = rgr.predict(Xt)
ysp = rgr.predict(Xs)
print('Training MSE: ',np.mean((ytp-yt)**2))
print('Testing MSE: ',np.mean((ysp-ys)**2))
# ## Boston Data with smaller tree
# In[21]:
rgr = RegressionTree(max_depth=3)
rgr.fit(Xt,yt,verbose=1,feature_names = feature_names)
# In[22]:
plt.figure(figsize=(15,8))
rgr.plotTree(show=True,scale=True, showtitle =True, showDirection=False,DiffBranchColor=True)
ytp = rgr.predict(Xt)
ysp = rgr.predict(Xs)
print('Training MSE: ',np.mean((ytp-yt)**2))
print('Testing MSE: ',np.mean((ysp-ys)**2))