#!/usr/bin/env python # coding: utf-8 # # Decision Trees :: # # **Class is implemented only using numpy and visualization are shown using matplotlib. Code for class of decision tree is given in trees.py file** #

Table of Contents

#
# In[1]: import numpy as np import matplotlib.pyplot as plt # ## Import classifier and regressor from given file ***trees.py*** # In[2]: from trees import ClassificationTree, RegressionTree # ## For dataset and spliting, we need sklearn (Optional, if you have your own data) # In[4]: from sklearn import datasets from sklearn.model_selection import train_test_split # # Classification Tree # ## Iris Dataset # Loading and spliting for training and testing # In[5]: data = datasets.load_iris() X = data.data y = data.target feature_names = data.feature_names #Optional Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3) print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape) # ### Fitting a model (displaying the tree building) with different modes # #### verbose=0 (silent mode) # In[6]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=0,feature_names=feature_names) # #### verbose=1 (progress bar) # In[7]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=1,feature_names=feature_names) # #### verbose=2 (printing tree info) # In[8]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=2,feature_names=feature_names) # #### verbose=3 (printing branches only) # In[9]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=3,feature_names=feature_names) # #### verbose=4 (Plotting tree.. while building) # In[10]: get_ipython().run_line_magic('matplotlib', 'notebook') # In[16]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=4,feature_names=feature_names, randomBranch=True) plt.close(clf.fig) # ### Plotting the resulting tree # In[17]: plt.figure(figsize=(8,6)) clf.plotTree(show=True,DiffBranchColor=True,scale=False) # ### Plotting Tree with same color branches # In[18]: plt.figure(figsize=(8,6)) clf.plotTree(show=True) # ### Predicting and computing Accuracy # In[20]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy: ',np.mean(ytp==yt)) print('Testing Accuracy: ',np.mean(ysp==ys)) # ## Iris data with smaller tree # In[25]: clf = ClassificationTree(max_depth=2) clf.fit(Xt,yt,verbose=1,feature_names=feature_names) #plt.figure(figsize=(15,8)) plt.figure(figsize=(5,5)) clf.plotTree(show=True,DiffBranchColor=True) ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy: ',np.mean(ytp==yt)) print('Testing Accuracy: ',np.mean(ysp==ys)) # ## Breast Cancer data # In[26]: data = datasets.load_breast_cancer() X = data.data y = data.target feature_names = data.feature_names #Optional Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3) print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape) # ### Fitting model with displaying the details of tree in process (verbose=4) # **While building tree, To first choose True branch and then False set randomBranch=False** # In[28]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=4,feature_names=feature_names,randomBranch=False) #plt.close(clf.fig) # **To randomly selevting True or False branch set randomBranch=True** # In[29]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=4,feature_names=feature_names,randomBranch=True) plt.close(clf.fig) # ### Resulting tree # In[31]: plt.figure(figsize=(6,6)) clf.plotTree(show=True,DiffBranchColor=True,scale=False) # ### Fitting model with displaying the progress only (verbose=1) # In[32]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=1,feature_names=feature_names) plt.figure(figsize=(6,6)) clf.plotTree(show=True) # ### Plotting Decison Tree # In[33]: plt.figure(figsize=(6,6)) clf.plotTree(show=True) # ### Predicting and computing MSE # In[34]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy: ',np.mean(ytp==yt)) print('Testing Accuracy: ',np.mean(ysp==ys)) # **It's overfitting, try with smaller trees by decresing the max_depth of classifier** # # Regression Tree # ## Boston House price # In[35]: data = datasets.load_boston() X = data.data y = data.target feature_names = data.feature_names #Optional Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3) print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape) # In[37]: rgr = RegressionTree() rgr.fit(Xt,yt,verbose=1,feature_names = feature_names) # In[42]: get_ipython().run_line_magic('matplotlib', 'inline') plt.style.use('default') # In[44]: plt.figure(figsize=(10,10)) rgr.plotTree(show=True,scale=False,DiffBranchColor=True) # In[19]: plt.figure(figsize=(15,15)) rgr.plotTree(show=True,scale=True, showtitle =False, showDirection=False,DiffBranchColor=True) # In[45]: get_ipython().run_line_magic('matplotlib', 'notebook') rgr = RegressionTree(max_depth=4) rgr.fit(Xt,yt,verbose=4,feature_names = feature_names) # In[47]: plt.figure(figsize=(10,6)) rgr.plotTree(show=True,scale=True, showtitle =False, showDirection=False,DiffBranchColor=True) # In[48]: ytp = rgr.predict(Xt) ysp = rgr.predict(Xs) print('Training MSE: ',np.mean((ytp-yt)**2)) print('Testing MSE: ',np.mean((ysp-ys)**2)) # ## Boston Data with smaller tree # In[49]: rgr = RegressionTree(max_depth=3) rgr.fit(Xt,yt,verbose=1,feature_names = feature_names) # In[52]: plt.figure(figsize=(8,6)) rgr.plotTree(show=True,scale=True, showtitle =True, showDirection=False,DiffBranchColor=True) ytp = rgr.predict(Xt) ysp = rgr.predict(Xs) print('Training MSE: ',np.mean((ytp-yt)**2)) print('Testing MSE: ',np.mean((ysp-ys)**2)) # In[ ]: