import pandas as pd
df = pd.read_csv('http://bit.ly/kaggletrain')
df['Sex'] = df['Sex'].map({'male':0, 'female':1})
features = ['Pclass', 'Fare', 'Sex']
X = df[features]
y = df['Survived']
classes = ['Deceased', 'Survived']
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=2, random_state=0)
dt.fit(X, y);
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree, export_text # both are new in 0.21
plt.figure(figsize=(8, 6))
plot_tree(dt, feature_names=features, class_names=classes, filled=True);
print(export_text(dt, feature_names=features, show_weights=True))
|--- Sex <= 0.50 | |--- Fare <= 26.27 | | |--- weights: [361.00, 54.00] class: 0 | |--- Fare > 26.27 | | |--- weights: [107.00, 55.00] class: 0 |--- Sex > 0.50 | |--- Pclass <= 2.50 | | |--- weights: [9.00, 161.00] class: 1 | |--- Pclass > 2.50 | | |--- weights: [72.00, 72.00] class: 0
© 2020 Data School. All rights reserved.