You can click shift
+ enter
to run one cell, you can also click run in top menu.
To run all the cells, you can click kernel
and Restart and run all
in the top menu.
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 8,8
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
import numpy as np
from skimage.feature import hog
# Ignore warnings in notebook
import warnings
warnings.filterwarnings('ignore')
# Create a temporary python PATH to the module that we are using for the analysis
import sys
sys.path.insert(0, "/Users/Espenel/Desktop/Mini-Grant-Image-analysis/2018/Chloe/ChromosomeDetectionChloe/utils")
from chromosome_dsb import *
path = "/Users/Espenel/Desktop/Mini-Grant-Image-analysis/2018/Chloe/training_set_mip/positive/"
path_n = "/Users/Espenel/Desktop/Mini-Grant-Image-analysis/2018/Chloe/training_set_mip/negative/"
X, X1, X2, titles_pos, titles_neg = load_data.dataset(path, path_n)
visualization.plot_mosaic(X1, titles_pos)
visualization.plot_mosaic(X2, titles_neg)
fd, hog_image = hog(X[0], orientations=8, pixels_per_cell=(6, 6),block_norm='L1',
cells_per_block=(3, 3), visualize=True, multichannel=False)
fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(hog_image)
ax[1].imshow(X[0])
<matplotlib.image.AxesImage at 0x11d27b320>
y_pos = np.ones(len(X1))
y_neg = np.zeros(len(X2))
Y = np.hstack((y_pos, y_neg))
visualization.plot_mosaic(X, Y)
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test, dat = build_classifier.hog_convert_split(X, Y)
scaler = StandardScaler()
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
Some help on how to chose your estimator: https://scikit-learn.org/stable/tutorial/machine_learning_map/
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
clf = SVC(kernel='linear', probability=True)
clf.fit(X_train, y_train)
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto_deprecated', kernel='linear', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=False)
pred = clf.predict(X_test)
print(confusion_matrix(y_test,pred))
[[23 0] [ 1 27]]
print(classification_report(y_test,pred))
precision recall f1-score support 0.0 0.96 1.00 0.98 23 1.0 1.00 0.96 0.98 28 micro avg 0.98 0.98 0.98 51 macro avg 0.98 0.98 0.98 51 weighted avg 0.98 0.98 0.98 51
The precision tell me that from the number of chromosome the classifier found, how many were true. Recall tell me, out of all the chromosome I was suppose to found, how many did I found.
Will allow to find if our learning algorithm suffer from high bias (underfit) or high variance (overfitting)
visualization.plot_learning_curve(dat, Y, clf)
path = "/Users/Espenel/Desktop/Mini-Grant-Image-analysis/2018/Chloe/clf_scaler/"
load_data.save_file(path, "clf", clf)
load_data.save_file(path, "scaler", scaler)