#!/usr/bin/env python # coding: utf-8 # In[31]: import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[13]: from keras.models import Sequential from keras.layers import Dense from keras.utils.np_utils import to_categorical # In[60]: # from keras.datasets import boston_housing # (train_data, train_targets), (test_data, test_targets) = boston_housing.load_data() # In[2]: from sklearn.datasets import load_boston boston = load_boston() # In[3]: boston.data.shape # In[14]: (train_data, train_targets), (test_data, test_targets) = (boston.data[:406], boston.target[:406]), (boston.data[406:], boston.target[406:]) # In[15]: train_data.shape # In[16]: test_data.shape # In[17]: train_targets # In[18]: mean = train_data.mean(axis=0) train_data -= mean std = train_data.std(axis=0) train_data /= std # In[19]: test_data -= mean test_data /= std # In[20]: def build_model(): model = Sequential() model.add(Dense(64, activation='relu', input_dim=train_data.shape[1])) model.add(Dense(1)) model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) return model # In[21]: k= 4 num_val_samples = len(train_data) // k all_scores = [] for i in range(k): print('processing fold #', i) # prepare the validation data: data from partition # k val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples] val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples] # prepare the training data: data from all other partitions partial_train_data = np.concatenate( [train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], axis=0) partial_train_targets = np.concatenate( [train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]], axis=0) # build the Keras model (already compiled) model = build_model() # train the model (in silent mode, verbose=0) model.fit(partial_train_data, partial_train_targets, nb_epoch=100, batch_size=1, verbose=0) # evaluate the model on the validation data val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0) all_scores.append(val_mae) # In[22]: all_scores # In[23]: np.mean(all_scores) # In[24]: mean # In[25]: model.fit(train_data, train_targets, nb_epoch=300, batch_size=1, verbose=0) test_mse_score, test_mae_score = model.evaluate(test_data, test_targets) # In[46]: test_mae_score # In[47]: test_pred = model.predict(test_data) # In[48]: test_pred.flatten() # In[49]: test_targets # In[50]: print (boston.DESCR) # In[93]: plt.scatter(test_targets, test_pred.flatten()) plt.show() # ## Boston Housing data setについて # ボストンの家の価格データをsklearnを使って解析した例を見つけたので、それに沿って線形解析とNuralnetの結果を比較してみます。 # # - http://bigdata-madesimple.com/how-to-run-linear-regression-in-python-scikit-learn/ # # In[32]: import pandas as pd import seaborn # In[68]: boston = load_boston() bos = pd.DataFrame(boston.data) # In[69]: bos.head() # In[70]: bos.columns = boston.feature_names bos.head() # In[71]: boston.target[:5] # In[72]: bos['PRICE'] = boston.target # In[73]: bos.head() # In[74]: from sklearn.linear_model import LinearRegression X = bos.drop('PRICE', axis = 1) # In[75]: lm = LinearRegression() # In[76]: lm.fit(X, bos.PRICE) # In[77]: print 'Estimated intercept coefficient: ', lm.intercept_ # In[78]: print 'Number of coefficients:', len(lm.coef_) # In[79]: pd.DataFrame(zip(X.columns, lm.coef_), columns = ['features', 'estiatedCoefficients']) # In[97]: seaborn.pairplot(bos) plt.show() # In[80]: plt.scatter(bos.RM, bos.PRICE) plt.xlabel('Average number of rooms per dwelling (RM)') plt.ylabel('Housing Price') plt.title('Relationship between RM and Price') plt.show() # In[81]: lm.predict(X)[0:5] # In[82]: plt.scatter(bos.PRICE, lm.predict(X)) plt.xlabel("Prices: $Y_i$") plt.ylabel("Predicted prices: $¥hat{Y}_i$") plt.title("Prices vs Predicted Prices: $Y_i$ vs $¥hat{Y}_i$") plt.show() # In[85]: mseFull = np.mean((bos.PRICE - lm.predict(X))**2) print mseFull # In[ ]: