from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier import numpy as np import pandas as pd !wget https://raw.githubusercontent.com/susanli2016/Machine-Learning-with-Python/master/diabetes.csv dataset = pd.read_csv("diabetes.csv") dataset.head() dataset.shape features = dataset.drop(["Outcome"], axis=1) X = np.array(features) y = np.array(dataset["Outcome"]) X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0, test_size=0.20) tree = DecisionTreeClassifier() tree.fit(X_train, y_train) tree.tree_.max_depth validation_prediction = tree.predict(X_val) training_prediction = tree.predict(X_train) print('Exactitud training data: ', accuracy_score(y_true=y_train, y_pred=training_prediction)) print('Exactitud validation data: ', accuracy_score(y_true=y_val, y_pred=validation_prediction)) !apt-get install graphviz !pip install graphviz import graphviz from sklearn.tree import export_graphviz feature_names = features.columns dot_data = export_graphviz(tree, out_file=None, feature_names=feature_names, class_names=True, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dot_data) graph tree = DecisionTreeClassifier(min_samples_leaf=10, max_depth=8, min_samples_split=50) tree.fit(X_train, y_train) validation_prediction = tree.predict(X_val) training_prediction = tree.predict(X_train) print('Exactitud training data: ', accuracy_score(y_true=y_train, y_pred=training_prediction)) print('Exactitud validation data: ', accuracy_score(y_true=y_val, y_pred=validation_prediction)) dot_data = export_graphviz(tree, out_file=None, feature_names=feature_names, class_names=True, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dot_data) graph