%matplotlib inline import numpy as np import matplotlib.pyplot as plt from scipy import stats # use seaborn plotting defaults # If this causes an error, you can comment it out. import seaborn as sns sns.set() from astroML.datasets import fetch_rrlyrae_combined from sklearn.cross_validation import train_test_split X, y = fetch_rrlyrae_combined() N_plot = 5000 plt.scatter(X[-N_plot:, 0], X[-N_plot:, 1], c=y[-N_plot:], edgecolors='none', cmap='RdBu') plt.xlabel('u-g color') plt.ylabel('g-r color') plt.xlim(0.7, 1.4) plt.ylim(-0.2, 0.4); from sklearn.svm import SVC SVC? from sklearn.cross_validation import cross_val_score cross_val_score? from sklearn.cross_validation import train_test_split Xsubset, _, ysubset, _ = train_test_split(X, y, train_size=0.1) Xsubset.shape, ysubset.shape model = SVC(kernel='linear', class_weight='auto') cross_val_score(model, Xsubset, ysubset, scoring='f1') C_values = 10 ** np.linspace(-2, 8, 10) f1_scores = [] for C in C_values: model = SVC(kernel='linear', class_weight='auto', C=C) cv = cross_val_score(model, Xsubset, ysubset, scoring='f1') f1_scores.append(cv.mean()) plt.semilogx(C_values, f1_scores, '-k') from sklearn.ensemble import RandomForestRegressor RandomForestRegressor? from astroML.datasets import fetch_sdss_specgals data = fetch_sdss_specgals() # put magnitudes in a matrix feature_names = ['modelMag_%s' % f for f in 'ugriz'] X = np.vstack([data[f] for f in feature_names]).T y = data['z'] # Plot some magnitudes for the first two thousand points i, j = 0, 1 N = 2000 plt.scatter(X[:N, i], X[:N, j], c=y[:N], edgecolor='none', cmap='cubehelix') plt.xlabel(feature_names[i]) plt.ylabel(feature_names[j]) plt.colorbar(label='redshift');