#!/usr/bin/env python # coding: utf-8 # # Decision Trees :: # # **Class is implemented only using numpy and visualization are shown using matplotlib. Code for class of decision tree is given in trees.py file** #

1 Decision Trees ::
- 1.1 Import classifier and regressor from given file trees.py
- 1.2 for dataset and spliting, we need sklearn (Optional, if you have your own data)
2 Classification Tree
3 Regression Tree
- 3.1 Boston House price
- 3.2 Boston Data with smaller tree

# In[1]: import numpy as np import matplotlib.pyplot as plt # ## Import classifier and regressor from given file ***trees.py*** # In[2]: from trees import ClassificationTree, RegressionTree # ## for dataset and spliting, we need sklearn (Optional, if you have your own data) # In[4]: from sklearn import datasets from sklearn.model_selection import train_test_split # # Classification Tree # ## Iris Dataset # Loading and spliting for training and testing # In[5]: data = datasets.load_iris() X = data.data y = data.target feature_names = data.feature_names #Optional Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3) print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape) # ### Fitting a model (displaying the tree building) # In[6]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=2,feature_names=feature_names) # ### Plotting the resulting tree # In[7]: plt.figure(figsize=(15,8)) clf.plotTree(show=True) # ### Plotting Tree with color branches # In[8]: plt.figure(figsize=(15,8)) clf.plotTree(show=True,DiffBranchColor=True) # ### Prediting and computing Accuracy # In[9]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy: ',np.mean(ytp==yt)) print('Testing Accuracy: ',np.mean(ysp==ys)) # ## Iris data with smaller tree # In[10]: clf = ClassificationTree(max_depth=2) clf.fit(Xt,yt,verbose=1,feature_names=feature_names) #plt.figure(figsize=(15,8)) plt.figure(figsize=(7,6)) clf.plotTree(show=True) ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy: ',np.mean(ytp==yt)) print('Testing Accuracy: ',np.mean(ysp==ys)) # ## Breast Cancer data # In[11]: data = datasets.load_breast_cancer() X = data.data y = data.target feature_names = data.feature_names #Optional Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3) print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape) # ### Fitting model with displaying the details of tree in process (verbose=2) # In[12]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=2,feature_names=feature_names) # ### Fitting model with displaying the progress only (verbose=1) # In[13]: clf = ClassificationTree() clf.fit(Xt,yt,verbose=1,feature_names=feature_names) # ### Plotting Decison Tree # plt.figure(figsize=(15,8)) # clf.plotTree(show=True) # ### Predicting and computing MSE # In[15]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy: ',np.mean(ytp==yt)) print('Testing Accuracy: ',np.mean(ysp==ys)) # **It's overfitting, try with smaller trees by decresing the max_depth of classifier** # # Regression Tree # ## Boston House price # In[16]: data = datasets.load_boston() X = data.data y = data.target feature_names = data.feature_names #Optional Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3) print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape) # In[17]: rgr = RegressionTree() rgr.fit(Xt,yt,verbose=1,feature_names = feature_names) # In[19]: plt.figure(figsize=(15,15)) rgr.plotTree(show=True,scale=True, showtitle =False, showDirection=False,DiffBranchColor=True) # In[20]: ytp = rgr.predict(Xt) ysp = rgr.predict(Xs) print('Training MSE: ',np.mean((ytp-yt)**2)) print('Testing MSE: ',np.mean((ysp-ys)**2)) # ## Boston Data with smaller tree # In[21]: rgr = RegressionTree(max_depth=3) rgr.fit(Xt,yt,verbose=1,feature_names = feature_names) # In[22]: plt.figure(figsize=(15,8)) rgr.plotTree(show=True,scale=True, showtitle =True, showDirection=False,DiffBranchColor=True) ytp = rgr.predict(Xt) ysp = rgr.predict(Xs) print('Training MSE: ',np.mean((ytp-yt)**2)) print('Testing MSE: ',np.mean((ysp-ys)**2))

Table of Contents