import numpy as np import pandas as pd from sklearn import preprocessing CSV_PATH = "/Users/alex/states/data.csv" items = pd.read_csv(CSV_PATH) features = range(0, 6+1) def log_ftr(ftr): return np.log(ftr.abs()) * np.sign(ftr) #We scale/center all the data together here. Technically this is data peaking since we should "train scaler on test data" X = preprocessing.scale(log_ftr(items[features])) TEST_FRAC = 0.15 VALID_FRAC = 0.15 items_good = pd.DataFrame(np.hstack([items["State"].values[:, np.newaxis], X]), columns=["State"] + features) make_int = sorted(items_good["State"].unique()).index items_good["State"] = items_good["State"].apply(make_int) np.random.seed(seed=0) items_good_suffle = items_good.ix[np.random.permutation(items_good.index)] test_set_size = int(TEST_FRAC * len(items_good)) valid_set_size = int(VALID_FRAC * len(items_good)) items_good_suffle[0:test_set_size].to_csv("/Users/alex/states/test.csv", index=False) items_good_suffle[test_set_size:test_set_size+valid_set_size].to_csv("/Users/alex/states/valid.csv", index=False) items_good_suffle[test_set_size+valid_set_size:].to_csv("/Users/alex/states/train.csv", index=False) train=""" !obj:pylearn2.train.Train { dataset: &train !obj:pylearn2.datasets.csv_dataset.CSVDataset { path: '/Users/alex/states/train.csv', one_hot: 1 }, model: !obj:pylearn2.models.mlp.MLP { layers: [ !obj:pylearn2.models.mlp.Sigmoid { layer_name: 'h0', dim: 50, sparse_init: 7, }, !obj:pylearn2.models.mlp.Softmax { layer_name: 'y', n_classes: 50, irange: 0. } ], nvis: 7, }, algorithm: !obj:pylearn2.training_algorithms.bgd.BGD { batch_size: 10000, line_search_mode: 'exhaustive', conjugate: 1, updates_per_batch: 10, monitoring_dataset: { 'train' : *train, 'valid' : !obj:pylearn2.datasets.csv_dataset.CSVDataset { path: '/Users/alex/states/valid.csv', one_hot: 1 }, 'test' : !obj:pylearn2.datasets.csv_dataset.CSVDataset { path: '/Users/alex/states/test.csv', one_hot: 1 } }, termination_criterion: !obj:pylearn2.termination_criteria.And { criteria: [ !obj:pylearn2.termination_criteria.MonitorBased { channel_name: "valid_y_misclass" }, !obj:pylearn2.termination_criteria.EpochCounter { max_epochs: 10000 } ] } }, extensions: [ !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { channel_name: 'valid_y_misclass', save_path: "mlp_best.pkl" }, ] } """ from pylearn2.config import yaml_parse train = yaml_parse.load(train) train.main_loop() #TODO: Remove this import sys sys.path.append("/Users/alex/git/pylearn2/pylearn2/scripts/") # Right now I am using this fork of pylearn2 to support inline rendering: # https://github.com/cancan101/pylearn2/compare/ipython_embed_script %matplotlib inline import plot_monitor plot_monitor.run(model_paths=["mlp_best.pkl"], options_out=None, show_codes=["test_y_misclass", "valid_y_misclass", "train_y_misclass"])