# 加载必要的包 from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report from nolearn.dbn import DBN import numpy as np import pandas as pd # 读入训练文件 data = pd.read_csv('train.csv') # 找到X变量名 select_x = data.columns != 'label' # 将X和Y分开 train = data.ix[:, select_x]/255 label = data.label.values # 将数据切分成训练和检验 (trainX, testX, trainY, testY) = train_test_split(train, label, test_size = 0.33) # 设定训练参数 dbn = DBN( [trainX.shape[1], 300, 10], learn_rates = 0.3, learn_rate_decays = 0.9, epochs = 10, verbose = 1) dbn.fit(trainX, trainY) # 预测结果和检验集进行比对 preds = dbn.predict(testX) print classification_report(testY, preds) # 将kaggle比赛中的test读进来 test = pd.read_csv('test.csv') test = test/255 test = test.values train = train.values # 重新训练整个train数据 dbn = DBN( [train.shape[1], 300, 10], learn_rates = 0.3, learn_rate_decays = 0.9, epochs = 20, verbose = 0) dbn.fit(train, label) # 将预测结果写入csv并提交 preds = dbn.predict(test) Label = pd.Series(preds) ImageId = pd.Series(range(len(Label))) + 1 sub = pd.concat ([ImageId, Label],1) sub.columns = ['ImageId', 'Label'] sub.head() sub.to_csv('sub.csv',index =False)