data = np.loadtxt("data/breast-cancer-wisconsin.data", delimiter=',', dtype=str)
data[data == "?"] = "NaN"
data = data.astype(np.float)
X = data[:, 1:-1]
y = data[:, -1]
_, y = np.unique(y, return_inverse=True)
X[np.isnan(X)] = 0
X_ = np.hstack([X[:, 1:3], X[:, 5:6]])
X_.shape
(699, 3)
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(C=0.1)
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_, y)
clf.fit(X_train, y_train)
LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001)
clf.score(X_test, y_test)
0.97142857142857142
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)
_, y = np.unique(y, return_inverse=True)
ax.scatter3D(X[:, 0], X[:, 1], X[:, 2], color=np.array(["red", "green"])[y])
plt.show()
w = clf.coef_.ravel()
b = clf.intercept_
v1 = np.array([b / w[0], 0, 0])
v2 = np.array([0 , b/w[1], 0])
v3 = np.array([0 , 0, b / w[2]])
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from matplotlib.colors import colorConverter
fig = plt.figure()
cc = lambda arg: colorConverter.to_rgba(arg, alpha=0.6)
ax = Axes3D(fig)
x = [0,0,0]
h = -np.array([v1, v2, v3])
verts = [h]
ax.add_collection3d(Poly3DCollection(verts, facecolors=[cc("blue")]))
ax.scatter3D(X_[:, 0], X_[:, 1], X_[:, 2], color=np.array(["red", "green"])[y])
ax.set_xlabel("uniformity size")
ax.set_ylabel("uniformity shape")
ax.set_zlabel("Epithelial size")
plt.savefig("presentation/logreg-pics/wisconsin_surface.pdf")
plt.show()