from __future__ import print_function
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context("talk")
with np.load("data_files.npz") as data:
X_train = data['X_train']
Y_train = data['Y_train']
X_test = data['X_test']
Y_test = data['Y_test']
X_comp = data['X_comp']
del data
Work in floating point this time
X_train = np.float64(X_train)
X_test = np.float64(X_test)
X_comp = np.float64(X_comp)
X_train.shape
(15680, 3333)
plt.plot(X_train[0])
[<matplotlib.lines.Line2D at 0x7fb2786ad0d0>]
def convert_to_spectra(X):
out = []
for row in X:
# Compute frequency sprectrum
xfft = np.fft.fft(row)
n = len(xfft)
# Fold negative frequencies and drop DC component
half_n = np.ceil(n/2.0)
xfft = (2.0 / n) * xfft[1:half_n]
out.append(np.abs(xfft))
out = np.array(out)
return out
X_train_spectra = convert_to_spectra(X_train)
X_test_spectra = convert_to_spectra(X_test)
X_comp_spectra = convert_to_spectra(X_comp)
X_train_spectra.shape
(15680, 1666)
plt.plot(X_train_spectra[0])
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
<matplotlib.text.Text at 0x7fb27873f050>
def moving_average(X, n=3):
ret = []
for row in X:
row = np.cumsum(row)
row[n:] = row[n:] - row[:-n]
row = row[n - 1:] / n
ret.append(row)
ret = np.array(ret)
return ret
X_train_spectra_no_average = X_train_spectra
X_train_spectra = moving_average(X_train_spectra, n=5)
X_test_spectra = moving_average(X_test_spectra, n=5)
X_comp_spectra = moving_average(X_comp_spectra, n=5)
plt.subplot(2, 1, 1)
plt.plot(X_train_spectra_no_average[0])
plt.ylabel("Unaveraged Amplitude")
plt.subplot(2, 1, 2)
plt.plot(X_train_spectra[0])
plt.ylabel("Averaged Amplitude")
plt.xlabel("Frequency")
<matplotlib.text.Text at 0x7fb278492d10>
print(X_train_spectra.min(), X_train_spectra.max())
print(X_test_spectra.min(), X_test_spectra.max())
print(X_comp_spectra.min(), X_comp_spectra.max())
0.0 5079.24144214 0.0 4924.13100536 0.0 5131.84586544
X_train_spectra = np.int16(X_train_spectra)
X_test_spectra = np.int16(X_test_spectra)
X_comp_spectra = np.int16(X_comp_spectra)
for_google = np.c_[Y_train, X_train_spectra]
# np.savetxt("X_train_spectra_ave_goog.csv", for_google, delimiter=",", fmt='%i')
print(X_train_spectra.shape)
print(Y_train.shape)
print(X_test_spectra.shape)
print(Y_test.shape)
print(X_comp_spectra.shape)
(15680, 1662) (15680,) (6720, 1662) (6720,) (9600, 1662)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, verbose=True,max_depth=None,min_samples_split=1, random_state=0)
model.fit(X_train_spectra,Y_train)
[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 1.5min [Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.5min finished
RandomForestClassifier(bootstrap=True, compute_importances=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_density=None, min_samples_leaf=1, min_samples_split=1, n_estimators=100, n_jobs=1, oob_score=False, random_state=0, verbose=True)
my_score = model.score(X_test_spectra,Y_test)
print(my_score)
0.991666666667
[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 0.4s [Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.4s finished
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
Y_pred = model.predict(X_test_spectra)
[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 0.5s [Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.5s finished
accuracy_score(Y_test, Y_pred)
0.9916666666666667
for_google.shape
(15680, 1663)
print(classification_report(Y_test, Y_pred))
precision recall f1-score support 0 0.99 0.99 0.99 3381 1 0.99 0.99 0.99 3339 avg / total 0.99 0.99 0.99 6720
confusion_matrix(Y_test, Y_pred, labels=[0, 1])
array([[3358, 23], [ 33, 3306]])
plt.plot(model.feature_importances_)
plt.ylabel("Relative Feature Importance")
plt.xlabel("Frequency")
<matplotlib.text.Text at 0x7fb27468dcd0>