In [196]:
using SwiftObjectStores
using ColoringNames
using Distributions
using MLDataPattern
using Iterators
using MLLabelUtils
using StaticArrays
using Juno
using StatsBase
using Colors
using DataFrames
using Query
using Plots
gr()
WARNING: using MLDataPattern.obsview in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.BatchView in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.RandomObs in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.batchsize in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.undersample in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.oversample in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.randobs in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.ObsView in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.RandomBatches in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.eachbatch in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.DataSubset in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.BufferGetObs in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.kfolds in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.splitobs in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.getobs! in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.batchview in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.shuffleobs in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.leaveout in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.datasubset in module Main conflicts with an existing identifier.
WARNING: using MLDataPattern.eachobs in module Main conflicts with an existing identifier.
Out[196]:
Plots.GRBackend()
In [1]:
const od =(MLDataUtils.ObsDim.First(), MLDataUtils.ObsDim.Last())
const serv=SwiftService()

const valid_raw = get_file(fh->readdlm(fh,'\t'), serv, "color", "monroe/dev.csv")
const valid_hsv, valid_terms_padded, encoding = prepare_data(valid_raw; do_demacate=false)
const valid_text = valid_raw[:, 1]

const train_raw = get_file(fh->readdlm(fh,'\t'), serv, "color", "monroe/train.csv")
const train_hsv, train_terms_padded, encoding = prepare_data(train_raw, encoding; do_demacate=false)
const train_text = train_raw[:, 1]
INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/MLLabelUtils.ji for module MLLabelUtils.
INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/MLDataPattern.ji for module MLDataPattern.
WARNING: Method definition getobs(Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:1 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:1.
WARNING: Method definition nobs(Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:41 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:30.
WARNING: Method definition #nobs(Array{Any, 1}, StatsBase.#nobs, Any) in module MLDataPattern overwritten in module MLDataUtils.
WARNING: Method definition getobs(Any, Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:67 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:37.
WARNING: Method definition getobs(Base.SubArray) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:92 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:385.
WARNING: Method definition nobs(Tuple) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:173 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:445.
WARNING: Method definition nobs(Tuple, Tuple) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:183 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:455.
WARNING: Method definition getobs(Tuple, Any) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:198 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:462.
WARNING: Method definition getobs(Tuple, Any, Tuple) in module MLDataPattern at /home/ubuntu/.julia/v0.5/MLDataPattern/src/container.jl:208 overwritten in module MLDataUtils at /home/ubuntu/.julia/v0.5/MLDataUtils/src/accesspattern/datasubset.jl:472.
INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/SortingAlgorithms.ji for module SortingAlgorithms.
INFO: Recompiling stale cache file /home/ubuntu/.julia/lib/v0.5/DataFrames.ji for module DataFrames.
WARNING: Method definition describe(AbstractArray) in module StatsBase at /home/ubuntu/.julia/v0.5/StatsBase/src/scalarstats.jl:573 overwritten in module DataFrames at /home/ubuntu/.julia/v0.5/DataFrames/src/abstractdataframe/abstractdataframe.jl:407.
WARNING: Method definition describe(AbstractArray) in module StatsBase at /home/ubuntu/.julia/v0.5/StatsBase/src/scalarstats.jl:573 overwritten in module DataFrames at /home/ubuntu/.julia/v0.5/DataFrames/src/abstractdataframe/abstractdataframe.jl:407.
WARNING: Method definition require(Symbol) in module Base at loading.jl:345 overwritten in module Query at /home/ubuntu/.julia/v0.5/Requires/src/require.jl:12.
Out[1]:
1523108-element Array{Any,1}:
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 "acid green"
 ⋮           
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
 "yuck"      
In [2]:
function pairwise_stats(fun, names, hsvs)
    dt = DataFrame(name=String[], n_samples=Int[], hs=Float64[], hv=Float64[], vs=Float64[])
    @progress for (name, inds) in labelmap(names)
        eg_hsvs = @view hsvs[inds, :]

        hs = fun(eg_hsvs[:,1], eg_hsvs[:,2])
        hv = fun(eg_hsvs[:,1], eg_hsvs[:,3])
        vs = fun(eg_hsvs[:,3], eg_hsvs[:,2])

        push!(dt, [name, length(inds), hs, hv, vs])
    end
    dt
end
spearman = pairwise_stats(corspearman, train_text, train_hsv)



describe(spearman)
name
Summary Stats:
Length:         829
Type:           String
Number Unique:  829

n_samples
Summary Stats:
Mean:           1837.283474
Minimum:        70.000000
1st Quartile:   109.000000
Median:         214.000000
3rd Quartile:   627.000000
Maximum:        152953.000000
Length:         829
Type:           Int64

hs
Summary Stats:
Mean:           0.022951
Minimum:        -0.429653
1st Quartile:   -0.086345
Median:         0.019934
3rd Quartile:   0.121130
Maximum:        0.577041
Length:         829
Type:           Float64

hv
Summary Stats:
Mean:           0.000171
Minimum:        -0.577392
1st Quartile:   -0.101860
Median:         0.003642
3rd Quartile:   0.103550
Maximum:        0.497044
Length:         829
Type:           Float64

vs
Summary Stats:
Mean:           -0.048629
Minimum:        -0.497048
1st Quartile:   -0.132881
Median:         -0.050129
3rd Quartile:   0.031004
Maximum:        0.455614
Length:         829
Type:           Float64

In [ ]:
train_cols = hsv2colorant(train_hsv)|>vec
In [20]:
function pairwise_fields(T)
    ns = fieldnames(T)
    pairwise_ns = Tuple{Symbol,Symbol}[]
    for (ii, n1) in enumerate(ns)
        for (jj, n2) in enumerate(ns)
            jj<=ii && continue
            push!(pairwise_ns, (n1,n2))
        end
    end
    pairwise_ns
end
Out[20]:
pairwise_fields (generic function with 1 method)
In [235]:
function pairwise_stats{T}(fun, names, data::Vector{T})
    pairwise_ns = pairwise_fields(T)
    dt = DataFrame([String;Int;fill(Float64, 2*length(pairwise_ns))], 
                    [:name;  :n_samples; 
                    [Symbol(n1,n2) for (n1,n2) in pairwise_ns];
                    [Symbol(:abs_,n1,n2) for (n1,n2) in pairwise_ns]],
                    0)
    
    for (name, inds) in labelmap(names)
        sub = @view data[inds]
        row=Dict{Symbol, Any}(:name=>name, :n_samples=>length(inds))
        for (n1,n2) in pairwise_ns
            v1 = getfield.(sub, Scalar(n1))
            v2 = getfield.(sub, Scalar(n2))
            val = fun(v1, v2)
            row[Symbol(n1,n2)] = val
            row[Symbol(:abs_, n1,n2)] = abs(val)
        end
        push!(dt, row)
    end
    dt
end
WARNING: Method definition pairwise_stats(Any, Any, Array{#T<:Any, 1}) in module Main at In[233]:2 overwritten at In[235]:2.
Out[235]:
pairwise_stats (generic function with 2 methods)
In [263]:
Q1(v)=quantile(v,0.25)
Q3(v)=quantile(v,0.75)
WARNING: Method definition Q1(Any) in module Main at In[262]:3 overwritten at In[263]:2.
Out[263]:
Q3 (generic function with 1 method)
In [285]:
function summarise(funs, names, colors)
    dt = DataFrame(name_stat=Symbol[], f12=Float64[], f13=Float64[], f23=Float64[])
    for space in [RGB, HSV, HSI, HSL, xyY, XYZ, xyY, Lab, Luv, LCHab, LCHuv, DIN99, DIN99d, DIN99o, LMS, YIQ, YCbCr]
        space_name = space.name.name
        
        colors_space = convert(Vector{space}, colors)
        stats = pairwise_stats(corspearman, names, colors_space)
        for fun in funs
            row = Any[Symbol(space_name, :_, fun)]
            for col_ii in [2,1,0]
                column = stats[:, end-col_ii]
                push!(row, fun(column))
            end
            push!(dt, row)
        end
    end
    dt
end

summarise([Q3], train_text, train_cols)
WARNING: Method definition summarise(Any, Any, Any) in module Main at In[280]:2 overwritten at In[285]:2.
Out[285]:
name_statf12f13f23
1RGB_Q30.60304681731617350.44719269806454920.5655902318754856
2HSV_Q30.186133418362447840.186659440736361340.1627735443944819
3HSI_Q30.24460867882115840.239117831051627760.6301694579237653
4HSL_Q30.165517311460995020.21470748631732640.3112894699855042
5xyY_Q30.72302603767980170.50241203396883620.41654070660522274
6XYZ_Q30.97260113347664650.81665027538282050.7843590386564617
7xyY_Q30.72302603767980170.50241203396883620.41654070660522274
8Lab_Q30.57301805309261710.45974988895342880.6390289755602638
9Luv_Q30.55979354387192730.6111774751288590.4379400128184171
10LCHab_Q30.52583446516030790.4110302351623740.3687582570400751
11LCHuv_Q30.61235119571869910.40719403394644070.34158057511350925
12DIN99_Q30.54494743351886210.4930922692126550.5235018403496664
13DIN99d_Q30.54423076923076920.44260730098802840.4802693716414654
14DIN99o_Q30.56081964889545830.408223817910852160.5211370440261909
15LMS_Q30.96803671042317420.74575342465753420.7789878283151825
16YIQ_Q30.40883095937080920.497525057167740.4064337083345767
17YCbCr_Q30.40046051834282650.439294990723562150.3377021554829844
In [ ]: