require 'daru' require 'statsample-glm' require 'open-uri' content = open('http://www.ats.ucla.edu/stat/data/binary.csv') File.write('binary.csv', content.read) df = Daru::DataFrame.from_csv "binary.csv" df.vectors = Daru::Index.new([:admit, :gpa, :gre, :rank]) df glm = Statsample::GLM::compute df, :admit, :logistic, constant: 1 c = glm.coefficients :hash Daru::Vector.new(c).exp # Calling `#exp` on Daru::Vector exponentiates each element of the Vector. e = Math::E new_data = Daru::DataFrame.new({ gre: [df[:gre].mean]*4, gpa: [df[:gpa].mean]*4, rank: df[:rank].factors }) new_data[:rankp] = new_data.collect(:row) do |x| 1 / (1 + e ** -(c[:constant] + x[:gre] * c[:gre] + x[:gpa] * c[:gpa] + x[:rank] * c[:rank])) end new_data.sort! [:rank] require 'distribution' # Create a normally distributed Vector with mean 30 and standard deviation 2 rng = Distribution::Normal.rng(30,2) body_weight = Daru::Vector.new(20.times.map { rng.call }.sort) # Populate chances of survival, assume that people with less body weight on average # are less likely to survive. survive = Daru::Vector.new [0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1] df = Daru::DataFrame.new({ body_weight: body_weight, survive: survive }) glm = Statsample::GLM.compute df, :survive, :logistic, constant: 1 coeffs = glm.coefficients :hash e = Math::E df[:survive_pred] = df[:body_weight].map { |x| 1 / (1 + e ** -(coeffs[:constant] + x*coeffs[:body_weight])) } df df.plot type: [:scatter,:line], x: [:body_weight]*2, y: [:survive_pred]*2 do |plot, diagram| plot.x_label "Body Weight" plot.y_label "Probability of Survival" end