using CMPlot
using PlotlyJS
using Random #for shuffle()
#To load some public test data:
using RDatasets
iris = dataset("datasets", "iris") # Iris dataset; Anderson, Edgar (1935) Fisher, R. A. (1936) ; http://vincentarelbundock.github.io/Rdatasets/doc/datasets/iris.html
train = dataset("Ecdat", "Wages") # Individual wages, US, 1976 to 1982; Cornwell, C. and P. Rupert (1988) ; http://vincentarelbundock.github.io/Rdatasets/doc/Ecdat/Wages.html
train[!,:ExpYears]=[if x < 10 "<10" elseif x >= 10 && x < 30 "10-29" else ">30" end for x in train.Exp] #binning on years of full-time work experience
train[!,:EduYears]=[if x < 9 "<9" elseif x >= 9 && x < 14 "9-13" else ">13" end for x in train.Ed] #binning on years of education
train = train[shuffle(1:size(train, 1)),:] #shuffle if you wish to bias when using pointsmaxdisplayed
train[1:3,:]
#Alternatively, load your own data, e.g. via CSV:
#=
using CSV
using Statistics
train = DataFrame(CSV.File("train.csv", delim=',', decimal='.')) #dataset on loans
#Some data cleaning:
train.Married=coalesce.(train.Married,"No") #replace missing values in Married with "No"
train.Gender=coalesce.(train.Gender,"Male") #replace missing values in Gender with "Male"
train.LoanAmount=coalesce.(train.LoanAmount,floor(Int,mean(skipmissing(train.LoanAmount)))) #replace missing values in LoanAmount with their mean
train.LoanAmount[1]
=#
plot(cmplot(iris,xcol=:Species)...) #using splat operator
using PlotlyJS
layout=Layout(
title="BoxPlot",xaxis_title="Wage (log)",yaxis_title="Married+Gender",
xaxis_showgrid=true, yaxis_showgrid=true,
margin_t=20,
legend_y=0.9, legend_x=0.8,
)
boxbygender=box(y=train.Sex, x=train.LWage, name="Gender",legendgroup="gender",orientation="h",marker_color="green")
boxbymarried=box(y=train.Married, x=train.LWage, name="Married",legendgroup="married",orientation="h",marker_color="blue")
violbygender=violin(y=train.Sex, x=train.LWage, name="Gender",legendgroup="gender",box_visible=true,
orientation="h",marker_color="green")
violbymarried=violin(y=train.Married, x=train.LWage, name="Married",legendgroup="married",box_visible=true,
orientation="h",marker_color="blue")
p1=PlotlyJS.plot([boxbygender,boxbymarried],layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_plus_gender-loanamount_boxplot.pdf"))
layout["title"]="ViolinPlot"
layout["yaxis_title"]=" "
p2=PlotlyJS.plot([violbygender,violbymarried],layout)#,style=mystyle)
#savefig(p2::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_plus_gender-loanamount_violinplot.pdf"))
[p1 p2]
(bt1,layout)=cmplot(train; xcol=:Sex, ycol=:LWage, orientation="h",inf="none",ycolorgroups=false,
side="both",colorshift=2,colorrange=4,pointsopacity=1,showpoints=true,showboxplot=false,pointshapes=["line-ns"],
markoutliers=false,pointsmaxdisplayed=200)
(bt2,layout)=cmplot(train; xcol=:Married, ycol=:LWage, orientation="h",inf="none",ycolorgroups=false,
side="both",colorshift=0,colorrange=4,pointsopacity=1,showpoints=true,showboxplot=false,pointshapes=["line-ns"],
markoutliers=false,pointsmaxdisplayed=200)
layout["title"]="BeanPlot"
layout["yaxis_title"]="Married + Gender"
layout["xaxis_title"]="Wage (log)"
layout["margin_t"]=30
layout["showlegend"]=false
p1=PlotlyJS.plot(union(bt1,bt2),layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_plus_gender-loanamount_rdiplot.pdf"))
#union of two different Xcolumns, joining pirate plot like traces:
(pt1,layout)=cmplot(train; xcol=:Sex, ycol=:LWage, orientation="h",inf="hdi",ycolorgroups=false,
side="both",colorshift=2,colorrange=4,pointsopacity=0.3,showpoints=true,showboxplot=false,pointshapes=["circle"],
markoutliers=false,pointsmaxdisplayed=200)
(pt2,layout)=cmplot(train; xcol=:Married, ycol=:LWage, orientation="h",inf="hdi",ycolorgroups=false,
side="both",colorshift=0,colorrange=4,pointsopacity=0.3,showpoints=true,showboxplot=false,pointshapes=["circle"],
markoutliers=false,pointsmaxdisplayed=200)
layout["title"]="PiratePlot"
layout["yaxis_title"]=" "
layout["xaxis_title"]="Wage (log)"
layout["margin_t"]=30
layout["legend"]=attr(orientation="h")
p2=PlotlyJS.plot(union(pt1,pt2),layout)
#savefig(p2::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_plus_gender-loanamount_pirateplot.pdf"))
[p1 p2]
# union of two separate Xcolumns (Gender + Married)
(traces1,layout)=cmplot(train; xcol=:Sex, ycol=:LWage, xsuperimposed=false,orientation="h",
colorshift=2,colorrange=4,ycolorgroups=false,side="pos",inf="hdi",conf_level=0.95,altsidesflip=false,
pointsoverdens=true,showpoints=true,pointshapes=["triangle-down","triangle-up"],pointsopacity=0.2,
pointsdistance=1,pointsmaxdisplayed=400)
(traces2,layout)=cmplot(train; xcol=:Married, ycol=:LWage, xsuperimposed=false,
orientation="h",colorshift=0,colorrange=4,ycolorgroups=false,side="pos",inf="hdi",conf_level=0.95,
altsidesflip=false,pointsoverdens=true,showpoints=true,pointshapes=["triangle-right","triangle-left"],
pointsopacity=0.3,pointsdistance=1,pointsmaxdisplayed=400,title="CloudyMountainPlot")
layout["legend_tracegroupgap"]=0
layout["xaxis_title"]="Wage (log)"
layout["yaxis_title"]="Married and Gender"
layout["margin_l"]=60
#layout["yaxis_range"]=[-0.1,3.51]
p1=plot(union(traces1,traces2),layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_plus_gender-loanamount_overimposed_rdiplot.pdf"))
# Superimposed rdi plots for union of two separate Xcolumns (Gender + Married)
(traces1,layout)=cmplot(train; xcol=:Sex, ycol=:LWage, xlabel="M/F", xsuperimposed=true,orientation="h",
colorshift=2,colorrange=4,ycolorgroups=false,side="alt",inf="hdi",conf_level=0.95,altsidesflip=false,
pointsoverdens=true,showpoints=true,pointshapes=["triangle-down","triangle-up"],pointsdistance=0.6,
pointsmaxdisplayed=400)
(traces2,layout)=cmplot(train; xcol=:Married, ycol=:LWage, xlabel="married?", xsuperimposed=true,
orientation="h",colorshift=0,colorrange=4,ycolorgroups=false,side="alt",inf="hdi",conf_level=0.95,
altsidesflip=false,pointsoverdens=true,showpoints=true,pointshapes=["triangle-right","triangle-left"]
,pointsdistance=0.6,pointsmaxdisplayed=400)
layout["legend_tracegroupgap"]=0
layout["yaxis_title"]="Married and Gender"
layout["xaxis_title"]="Wage (log)"
layout["title"]="Married + Gender ~ Wage"
layout["margin_l"]=60
#layout["yaxis_range"]=[-0.51,1.51]
p1=plot(union(traces1,traces2),layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_plus_gender-loanamount_overimposed_rdiplot.pdf"))
#IRIS dataset, plotting two Ycol side by side
traces,layout=cmplot(iris,xcol=:Species,ycol=[:SepalLength,:PetalLength],
colorrange=3,pointshapes=["star-triangle-up","star-diamond","star-square"])
p1=plot(traces,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"species-sepallength_petallength_rdiplot.pdf"))
#IRIS dataset, plotting three Ycol side by side
traces,layout=cmplot(iris,xcol=:Species,ycol=[:SepalLength,:PetalLength,:SepalWidth],
pointshapes=["star-triangle-up","star-diamond","star-square"])
p1=plot(traces,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"species-sepallength_petallength_sepalwidth_rdiplot.pdf"))
#intersection of two different Xcolumns:
traces,layout=cmplot(train; xcol=[:Married,:Sex], ycol=:LWage,ycolorgroups=false,
side="pos",pointshapes=["star-diamond"],pointsmaxdisplayed=500)
layout["legend_tracegroupgap"]=0
layout["title"]="Married & Gender ~ Wage"
layout["yaxis_title"]="Married & Gender"
layout["xaxis_title"]="Wage (log)"
p1=plot(traces,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_gender-loanamount_rdiplot.pdf"))
#intersection of three different Xcolumns:
traces,layout=cmplot(train; xcol=[:Sex,:Married,:SMSA], ycol=:LWage, ycolorgroups=false, side="both",
pointsmaxdisplayed=300)
layout["legend_tracegroupgap"]=0
layout["margin_l"]=180
layout["margin_r"]=0
layout["title"]="Gender & Married & LivesInCity ~ Wage"
layout["yaxis_title"]="Gender & Married & LivesInCity"
layout["xaxis_title"]="Wage (log)"
p1=plot(traces,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"married_gender_education-applicantincome_rdiplot.pdf"))
#Superimposed plots for a single Xcolumn
(traces1,layout)=cmplot(train; xcol=:South, ycol=:LWage, xsuperimposed=true, pointsoverdens=true,
ycolorgroups=false,altsidesflip=false, colorshift=2, colorrange=4, pointshapes=["star","pentagon"],
pointsmaxdisplayed=400)
layout["legend_tracegroupgap"]=0
layout["margin_b"]=50
layout["title"]="South ~ Wage"
layout["xaxis_title"]="Wage (log)"
layout["yaxis_title"]="Resides in the south?"
layout["yaxis_range"]=[-0.51,0.51]
p1=plot(traces1,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"gender-loanamount_rdiplot.pdf"))
#superimposed plot for combination of two X variables
traces,layout=cmplot(train,xcol=[:Sex,:BlueCol],xsuperimposed=true,ycol=:LWage,
ycolorgroups=false,pointsoverdens=true,markoutliers=false,pointshapes=["hexagon"],pointsmaxdisplayed=500)
layout["legend_tracegroupgap"]=0
layout["title"]="Gender & BlueCollar ~ Wage"
layout["xaxis_title"]="Wage (log)"
layout["yaxis_title"]="Gender & BlueCollar"
p1=plot(traces,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"propertyarea_gender-loanamount_rdiplot.pdf"))
#superimposed plot for combination of two X variables, one of which with 3 bins
traces,layout=cmplot(train,xcol=[:EduYears,:Union],xsuperimposed=true,ycol=:LWage,
ycolorgroups=false, altsidesflip=true, pointsoverdens=true,markoutliers=false,
pointshapes=["hexagon"],pointsmaxdisplayed=100)
layout["legend_tracegroupgap"]=0
layout["title"]="EducationYears & UnionContract ~ Wage"
layout["xaxis_title"]="Wage (log)"
layout["yaxis_title"]="EducationYears & UnionContract"
p1=plot(traces,layout)
#savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"propertyarea_gender-loanamount_rdiplot.pdf"))
#To save the plot as high quality file:
#=
using ORCA
savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"output_filename.svg"))
savefig(p1::Union{Plot,PlotlyJS.SyncPlot}, joinpath(homedir(),"output_filename.pdf"))
=#
#NOTE: if it does not work, try from a terminal to manually start the conda server,
# e.g. "conda run orca serve -p 7982"
jsonplot1 = json(p1)
template = "<html>
<head>
<script src='https://cdn.plot.ly/plotly-latest.min.js'></script>
</head>
<body>
<div id='divPlotly'></div>
<script>
var plotly_data = $jsonplot1
Plotly.react('divPlotly', plotly_data.data, plotly_data.layout);
</script>
</body>
</html>"
outputfilename="plot_name.html"
#=
open(joinpath(homedir(),outputfilename), "w") do f
write(f, template)
end
=#