#!/usr/bin/env python
# coding: utf-8
# # Decision Trees ::
#
# **Class is implemented only using numpy and visualization are shown using matplotlib. Code for class of decision tree is given in trees.py file**
#
Table of Contents
#
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
# ## Import classifier and regressor from given file ***trees.py***
# In[2]:
from trees import ClassificationTree, RegressionTree
# ## For dataset and spliting, we need sklearn (Optional, if you have your own data)
# In[4]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
# # Classification Tree
# ## Iris Dataset
# Loading and spliting for training and testing
# In[5]:
data = datasets.load_iris()
X = data.data
y = data.target
feature_names = data.feature_names #Optional
Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)
print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)
# ### Fitting a model (displaying the tree building) with different modes
# #### verbose=0 (silent mode)
# In[6]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=0,feature_names=feature_names)
# #### verbose=1 (progress bar)
# In[7]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=1,feature_names=feature_names)
# #### verbose=2 (printing tree info)
# In[8]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=2,feature_names=feature_names)
# #### verbose=3 (printing branches only)
# In[9]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=3,feature_names=feature_names)
# #### verbose=4 (Plotting tree.. while building)
# In[10]:
get_ipython().run_line_magic('matplotlib', 'notebook')
# In[16]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=4,feature_names=feature_names, randomBranch=True)
plt.close(clf.fig)
# ### Plotting the resulting tree
# In[17]:
plt.figure(figsize=(8,6))
clf.plotTree(show=True,DiffBranchColor=True,scale=False)
# ### Plotting Tree with same color branches
# In[18]:
plt.figure(figsize=(8,6))
clf.plotTree(show=True)
# ### Predicting and computing Accuracy
# In[20]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy: ',np.mean(ytp==yt))
print('Testing Accuracy: ',np.mean(ysp==ys))
# ## Iris data with smaller tree
# In[25]:
clf = ClassificationTree(max_depth=2)
clf.fit(Xt,yt,verbose=1,feature_names=feature_names)
#plt.figure(figsize=(15,8))
plt.figure(figsize=(5,5))
clf.plotTree(show=True,DiffBranchColor=True)
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy: ',np.mean(ytp==yt))
print('Testing Accuracy: ',np.mean(ysp==ys))
# ## Breast Cancer data
# In[26]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names #Optional
Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)
print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)
# ### Fitting model with displaying the details of tree in process (verbose=4)
# **While building tree, To first choose True branch and then False set randomBranch=False**
# In[28]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=4,feature_names=feature_names,randomBranch=False)
#plt.close(clf.fig)
# **To randomly selevting True or False branch set randomBranch=True**
# In[29]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=4,feature_names=feature_names,randomBranch=True)
plt.close(clf.fig)
# ### Resulting tree
# In[31]:
plt.figure(figsize=(6,6))
clf.plotTree(show=True,DiffBranchColor=True,scale=False)
# ### Fitting model with displaying the progress only (verbose=1)
# In[32]:
clf = ClassificationTree()
clf.fit(Xt,yt,verbose=1,feature_names=feature_names)
plt.figure(figsize=(6,6))
clf.plotTree(show=True)
# ### Plotting Decison Tree
# In[33]:
plt.figure(figsize=(6,6))
clf.plotTree(show=True)
# ### Predicting and computing MSE
# In[34]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy: ',np.mean(ytp==yt))
print('Testing Accuracy: ',np.mean(ysp==ys))
# **It's overfitting, try with smaller trees by decresing the max_depth of classifier**
# # Regression Tree
# ## Boston House price
# In[35]:
data = datasets.load_boston()
X = data.data
y = data.target
feature_names = data.feature_names #Optional
Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)
print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)
# In[37]:
rgr = RegressionTree()
rgr.fit(Xt,yt,verbose=1,feature_names = feature_names)
# In[42]:
get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('default')
# In[44]:
plt.figure(figsize=(10,10))
rgr.plotTree(show=True,scale=False,DiffBranchColor=True)
# In[19]:
plt.figure(figsize=(15,15))
rgr.plotTree(show=True,scale=True, showtitle =False, showDirection=False,DiffBranchColor=True)
# In[45]:
get_ipython().run_line_magic('matplotlib', 'notebook')
rgr = RegressionTree(max_depth=4)
rgr.fit(Xt,yt,verbose=4,feature_names = feature_names)
# In[47]:
plt.figure(figsize=(10,6))
rgr.plotTree(show=True,scale=True, showtitle =False, showDirection=False,DiffBranchColor=True)
# In[48]:
ytp = rgr.predict(Xt)
ysp = rgr.predict(Xs)
print('Training MSE: ',np.mean((ytp-yt)**2))
print('Testing MSE: ',np.mean((ysp-ys)**2))
# ## Boston Data with smaller tree
# In[49]:
rgr = RegressionTree(max_depth=3)
rgr.fit(Xt,yt,verbose=1,feature_names = feature_names)
# In[52]:
plt.figure(figsize=(8,6))
rgr.plotTree(show=True,scale=True, showtitle =True, showDirection=False,DiffBranchColor=True)
ytp = rgr.predict(Xt)
ysp = rgr.predict(Xs)
print('Training MSE: ',np.mean((ytp-yt)**2))
print('Testing MSE: ',np.mean((ysp-ys)**2))
# In[ ]: