from __future__ import print_function %matplotlib inline import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_context("talk") with np.load("data_files.npz") as data: X_train = data['X_train'] Y_train = data['Y_train'] X_test = data['X_test'] Y_test = data['Y_test'] X_comp = data['X_comp'] del data X_train = np.float64(X_train) X_test = np.float64(X_test) X_comp = np.float64(X_comp) X_train.shape plt.plot(X_train[0]) def convert_to_spectra(X): out = [] for row in X: # Compute frequency sprectrum xfft = np.fft.fft(row) n = len(xfft) # Fold negative frequencies and drop DC component half_n = np.ceil(n/2.0) xfft = (2.0 / n) * xfft[1:half_n] out.append(np.abs(xfft)) out = np.array(out) return out X_train_spectra = convert_to_spectra(X_train) X_test_spectra = convert_to_spectra(X_test) X_comp_spectra = convert_to_spectra(X_comp) X_train_spectra.shape plt.plot(X_train_spectra[0]) plt.xlabel("Frequency") plt.ylabel("Amplitude") def moving_average(X, n=3): ret = [] for row in X: row = np.cumsum(row) row[n:] = row[n:] - row[:-n] row = row[n - 1:] / n ret.append(row) ret = np.array(ret) return ret X_train_spectra_no_average = X_train_spectra X_train_spectra = moving_average(X_train_spectra, n=5) X_test_spectra = moving_average(X_test_spectra, n=5) X_comp_spectra = moving_average(X_comp_spectra, n=5) plt.subplot(2, 1, 1) plt.plot(X_train_spectra_no_average[0]) plt.ylabel("Unaveraged Amplitude") plt.subplot(2, 1, 2) plt.plot(X_train_spectra[0]) plt.ylabel("Averaged Amplitude") plt.xlabel("Frequency") print(X_train_spectra.min(), X_train_spectra.max()) print(X_test_spectra.min(), X_test_spectra.max()) print(X_comp_spectra.min(), X_comp_spectra.max()) X_train_spectra = np.int16(X_train_spectra) X_test_spectra = np.int16(X_test_spectra) X_comp_spectra = np.int16(X_comp_spectra) for_google = np.c_[Y_train, X_train_spectra] # np.savetxt("X_train_spectra_ave_goog.csv", for_google, delimiter=",", fmt='%i') print(X_train_spectra.shape) print(Y_train.shape) print(X_test_spectra.shape) print(Y_test.shape) print(X_comp_spectra.shape) from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=100, verbose=True,max_depth=None,min_samples_split=1, random_state=0) model.fit(X_train_spectra,Y_train) my_score = model.score(X_test_spectra,Y_test) print(my_score) from sklearn.metrics import accuracy_score, classification_report, confusion_matrix Y_pred = model.predict(X_test_spectra) accuracy_score(Y_test, Y_pred) for_google.shape print(classification_report(Y_test, Y_pred)) confusion_matrix(Y_test, Y_pred, labels=[0, 1]) plt.plot(model.feature_importances_) plt.ylabel("Relative Feature Importance") plt.xlabel("Frequency")