using SwiftObjectStores
using ColoringNames
using Distributions
using MLDataPattern
using Iterators
using MLLabelUtils
using StaticArrays
using Juno
using StatsBase
using Colors
using DataFrames
using Query
using Plots
gr()
WARNING: using MLDataPattern.obsview in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.BatchView in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.RandomObs in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.batchsize in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.undersample in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.oversample in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.randobs in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.ObsView in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.RandomBatches in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.eachbatch in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.DataSubset in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.BufferGetObs in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.kfolds in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.splitobs in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.getobs! in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.batchview in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.shuffleobs in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.leaveout in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.datasubset in module Main conflicts with an existing identifier. WARNING: using MLDataPattern.eachobs in module Main conflicts with an existing identifier.
Plots.GRBackend()
const od =(MLDataUtils.ObsDim.First(), MLDataUtils.ObsDim.Last())
const serv=SwiftService()
const valid_raw = get_file(fh->readdlm(fh,'\t'), serv, "color", "monroe/dev.csv")
const valid_hsv, valid_terms_padded, encoding = prepare_data(valid_raw; do_demacate=false)
const valid_text = valid_raw[:, 1]
const train_raw = get_file(fh->readdlm(fh,'\t'), serv, "color", "monroe/train.csv")
const train_hsv, train_terms_padded, encoding = prepare_data(train_raw, encoding; do_demacate=false)
const train_text = train_raw[:, 1]
INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/MLLabelUtils.ji for module MLLabelUtils. INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/MLDataPattern.ji for module MLDataPattern. WARNING: Method definition getobs(Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:1 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:1. WARNING: Method definition nobs(Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:41 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:30. WARNING: Method definition #nobs(Array{Any, 1}, StatsBase.#nobs, Any) in module MLDataPattern overwritten in module MLDataUtils. WARNING: Method definition getobs(Any, Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:67 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:37. WARNING: Method definition getobs(Base.SubArray) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:92 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:385. WARNING: Method definition nobs(Tuple) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:173 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:445. WARNING: Method definition nobs(Tuple, Tuple) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:183 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:455. WARNING: Method definition getobs(Tuple, Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:198 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:462. WARNING: Method definition getobs(Tuple, Any, Tuple) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:208 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:472. INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/SortingAlgorithms.ji for module SortingAlgorithms. INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/DataFrames.ji for module DataFrames. WARNING: Method definition describe(AbstractArray) in module StatsBase at /home/ubuntu/.julia/v0.5/StatsBase/src/scalarstats.jl:573 overwritten in module DataFrames at /home/ubuntu/.julia/v0.5/DataFrames/src/abstractdataframe/abstractdataframe.jl:407. WARNING: Method definition describe(AbstractArray) in module StatsBase at /home/ubuntu/.julia/v0.5/StatsBase/src/scalarstats.jl:573 overwritten in module DataFrames at /home/ubuntu/.julia/v0.5/DataFrames/src/abstractdataframe/abstractdataframe.jl:407. WARNING: Method definition require(Symbol) in module Base at loading.jl:345 overwritten in module Query at /home/ubuntu/.julia/v0.5/Requires/src/require.jl:12.
1523108-element Array{Any,1}: "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" "acid green" ⋮ "yuck" "yuck" "yuck" "yuck" "yuck" "yuck" "yuck" "yuck" "yuck" "yuck" "yuck" "yuck"
function pairwise_stats(fun, names, hsvs)
dt = DataFrame(name=String[], n_samples=Int[], hs=Float64[], hv=Float64[], vs=Float64[])
@progress for (name, inds) in labelmap(names)
eg_hsvs = @view hsvs[inds, :]
hs = fun(eg_hsvs[:,1], eg_hsvs[:,2])
hv = fun(eg_hsvs[:,1], eg_hsvs[:,3])
vs = fun(eg_hsvs[:,3], eg_hsvs[:,2])
push!(dt, [name, length(inds), hs, hv, vs])
end
dt
end
spearman = pairwise_stats(corspearman, train_text, train_hsv)
describe(spearman)
name Summary Stats: Length: 829 Type: String Number Unique: 829 n_samples Summary Stats: Mean: 1837.283474 Minimum: 70.000000 1st Quartile: 109.000000 Median: 214.000000 3rd Quartile: 627.000000 Maximum: 152953.000000 Length: 829 Type: Int64 hs Summary Stats: Mean: 0.022951 Minimum: -0.429653 1st Quartile: -0.086345 Median: 0.019934 3rd Quartile: 0.121130 Maximum: 0.577041 Length: 829 Type: Float64 hv Summary Stats: Mean: 0.000171 Minimum: -0.577392 1st Quartile: -0.101860 Median: 0.003642 3rd Quartile: 0.103550 Maximum: 0.497044 Length: 829 Type: Float64 vs Summary Stats: Mean: -0.048629 Minimum: -0.497048 1st Quartile: -0.132881 Median: -0.050129 3rd Quartile: 0.031004 Maximum: 0.455614 Length: 829 Type: Float64
train_cols = hsv2colorant(train_hsv)|>vec
function pairwise_fields(T)
ns = fieldnames(T)
pairwise_ns = Tuple{Symbol,Symbol}[]
for (ii, n1) in enumerate(ns)
for (jj, n2) in enumerate(ns)
jj<=ii && continue
push!(pairwise_ns, (n1,n2))
end
end
pairwise_ns
end
pairwise_fields (generic function with 1 method)
function pairwise_stats{T}(fun, names, data::Vector{T})
pairwise_ns = pairwise_fields(T)
dt = DataFrame([String;Int;fill(Float64, 2*length(pairwise_ns))],
[:name; :n_samples;
[Symbol(n1,n2) for (n1,n2) in pairwise_ns];
[Symbol(:abs_,n1,n2) for (n1,n2) in pairwise_ns]],
0)
for (name, inds) in labelmap(names)
sub = @view data[inds]
row=Dict{Symbol, Any}(:name=>name, :n_samples=>length(inds))
for (n1,n2) in pairwise_ns
v1 = getfield.(sub, Scalar(n1))
v2 = getfield.(sub, Scalar(n2))
val = fun(v1, v2)
row[Symbol(n1,n2)] = val
row[Symbol(:abs_, n1,n2)] = abs(val)
end
push!(dt, row)
end
dt
end
WARNING: Method definition pairwise_stats(Any, Any, Array{#T<:Any, 1}) in module Main at In[233]:2 overwritten at In[235]:2.
pairwise_stats (generic function with 2 methods)
Q1(v)=quantile(v,0.25)
Q3(v)=quantile(v,0.75)
WARNING: Method definition Q1(Any) in module Main at In[262]:3 overwritten at In[263]:2.
Q3 (generic function with 1 method)
function summarise(funs, names, colors)
dt = DataFrame(name_stat=Symbol[], f12=Float64[], f13=Float64[], f23=Float64[])
for space in [RGB, HSV, HSI, HSL, xyY, XYZ, xyY, Lab, Luv, LCHab, LCHuv, DIN99, DIN99d, DIN99o, LMS, YIQ, YCbCr]
space_name = space.name.name
colors_space = convert(Vector{space}, colors)
stats = pairwise_stats(corspearman, names, colors_space)
for fun in funs
row = Any[Symbol(space_name, :_, fun)]
for col_ii in [2,1,0]
column = stats[:, end-col_ii]
push!(row, fun(column))
end
push!(dt, row)
end
end
dt
end
summarise([Q3], train_text, train_cols)
WARNING: Method definition summarise(Any, Any, Any) in module Main at In[280]:2 overwritten at In[285]:2.
name_stat | f12 | f13 | f23 | |
---|---|---|---|---|
1 | RGB_Q3 | 0.6030468173161735 | 0.4471926980645492 | 0.5655902318754856 |
2 | HSV_Q3 | 0.18613341836244784 | 0.18665944073636134 | 0.1627735443944819 |
3 | HSI_Q3 | 0.2446086788211584 | 0.23911783105162776 | 0.6301694579237653 |
4 | HSL_Q3 | 0.16551731146099502 | 0.2147074863173264 | 0.3112894699855042 |
5 | xyY_Q3 | 0.7230260376798017 | 0.5024120339688362 | 0.41654070660522274 |
6 | XYZ_Q3 | 0.9726011334766465 | 0.8166502753828205 | 0.7843590386564617 |
7 | xyY_Q3 | 0.7230260376798017 | 0.5024120339688362 | 0.41654070660522274 |
8 | Lab_Q3 | 0.5730180530926171 | 0.4597498889534288 | 0.6390289755602638 |
9 | Luv_Q3 | 0.5597935438719273 | 0.611177475128859 | 0.4379400128184171 |
10 | LCHab_Q3 | 0.5258344651603079 | 0.411030235162374 | 0.3687582570400751 |
11 | LCHuv_Q3 | 0.6123511957186991 | 0.4071940339464407 | 0.34158057511350925 |
12 | DIN99_Q3 | 0.5449474335188621 | 0.493092269212655 | 0.5235018403496664 |
13 | DIN99d_Q3 | 0.5442307692307692 | 0.4426073009880284 | 0.4802693716414654 |
14 | DIN99o_Q3 | 0.5608196488954583 | 0.40822381791085216 | 0.5211370440261909 |
15 | LMS_Q3 | 0.9680367104231742 | 0.7457534246575342 | 0.7789878283151825 |
16 | YIQ_Q3 | 0.4088309593708092 | 0.49752505716774 | 0.4064337083345767 |
17 | YCbCr_Q3 | 0.4004605183428265 | 0.43929499072356215 | 0.3377021554829844 |