require 'matplotlib/iruby' Matplotlib::IRuby.activate Matplotlib.interactive(false) # Interactive plotting OFF, necessary for inline plotting in IRuby xs = Array.new(500) { 100 * rand } ys = Array.new(500) { 100 * rand } areas = Array.new(500) { 800 * rand } nil # scatter plot by seaborn sns = PyCall.import_module('seaborn') ax = sns.scatterplot(x: xs, y: ys, size: areas, hue: areas) ax.set_xlabel('x') ax.set_ylabel('y') nil system("wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/pendigits") system("wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/pendigits.t") require 'rumale' samples, labels = Rumale::Dataset.load_libsvm_file('pendigits') model = Rumale::Pipeline::Pipeline.new(steps: { rbf: Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800), svc: Rumale::LinearModel::SVC.new(reg_param: 0.0001, max_iter: 1000), }) kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true) cv = Rumale::ModelSelection::CrossValidation.new(estimator: model, splitter: kf) report = cv.perform(samples, labels) mean_accuracy = report[:test_score].inject(:+) / kf.n_splits puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0)) test_samples, test_labels = Rumale::Dataset.load_libsvm_file('pendigits.t') accuracy = model.score(test_samples, test_labels) puts("Accuracy: %.1f%%" % (100.0 * accuracy))