using DataFrames
using RCall
@rimport base as R
R.Sys_setenv(LANG = "en") # これを実行しておかないとハングすることがある.
macro sym_str(x) :(Symbol($(esc(x)))) end
@show sym"conf.int";
sym"conf.int" = Symbol("conf.int")
rcalljl_options(; kwargs...) = rcopy(RCall.rcall_p(:options, rcalljl_options=Dict(kwargs)))
function rplotsize(w, h)
if RCall.ijulia_mime == MIME("image/svg+xml")
rcalljl_options(; width=w/100, height=h/100)
else
rcalljl_options(; width=w, height=h)
end
end
function rplotpng(; kwargs...)
RCall.ijulia_setdevice(MIME("image/png"); kwargs...)
RCall.ijulia_mime, rcalljl_options()
end
function rplotsvg(; kwargs...)
RCall.ijulia_setdevice(MIME("image/svg+xml"); kwargs...)
RCall.ijulia_mime, rcalljl_options()
end
rplotsvg (generic function with 1 method)
# original
# function RCall.ijulia_displayfile(m::MIME"image/svg+xml", f)
# # R svg images use named defs, which cause problem when used inline, see
# # https://github.com/jupyter/notebook/issues/333
# # we get around this by renaming the elements.
# open(f) do f
# r = RCall.randstring()
# d = read(f, String)
# d = replace(d, "id=\"glyph" => "id=\"glyph"*r)
# d = replace(d, "href=\"#glyph" => "href=\"#glyph"*r)
# display(m,d)
# end
# end
using Base64
function RCall.ijulia_displayfile(m::MIME"image/svg+xml", f)
open(f) do io
svg = read(io, String)
base64 = base64encode(svg)
html = """<img src="data:$m;base64,$base64" />"""
display(MIME("text/html"), html)
end
end
@rlibrary ggplot2
rplotsvg()
(MIME type image/svg+xml, OrderedCollections.OrderedDict{Symbol, Any}(:rcalljl_options => OrderedCollections.OrderedDict{Symbol, Any}(:height => 5, :width => 6)))
rplotsize(640, 400)
t = range(-10, 10, length=1000)
data = DataFrame(t = t, x = cos.(t), y = sin.(t))
ggplot(data=data, aes(x=:t, y=:x)) +
geom_line(color="red") +
geom_line(aes(y=:y), color="blue")
RObject{VecSxp}
# プロットの表示サイズの変更のテスト
@show rplotsize(400, 250)
sleep(0.1)
P = ggplot(data=data, aes(x=:t, y=:x)) +
geom_line(color="red") +
geom_line(aes(y=:y), color="blue")
display(P)
rplotsize(400, 250) = OrderedCollections.OrderedDict{Symbol, Any}(:rcalljl_options => OrderedCollections.OrderedDict{Symbol, Any}(:height => 4.0, :width => 6.4))
RObject{VecSxp}
@rlibrary stats # R の fisher.test などが fisher_test という名前で使えるようになる
A = [
10 10
7 27
]
f = fisher_test(A) # P値が5%未満なのに, 95%信頼区間が1を含む
RObject{VecSxp} Fisher's Exact Test for Count Data data: structure(c(10L, 7L, 10L, 27L), .Dim = c(2L, 2L)) p-value = 0.03516 alternative hypothesis: true odds ratio is not equal to 1 95 percent confidence interval: 0.9836015 15.3827687 sample estimates: odds ratio 3.751532
f = rcopy(f) # RObject の h を Julia の object に変換
OrderedCollections.OrderedDict{Symbol, Any} with 7 entries: :p_value => 0.0351564 :conf_int => [0.983602, 15.3828] :estimate => 3.75153 :null_value => 1.0 :alternative => "two.sided" :method => "Fisher's Exact Test for Count Data" :data_name => "structure(c(10L, 7L, 10L, 27L), .Dim = c(2L, 2L))"
@show f[:p_value]
@show f[:conf_int];
f[:p_value] = 0.03515636840648692 f[:conf_int] = [0.9836015045314647, 15.382768670334402]
@rlibrary exact2x2
rcall(:library, :exact2x2) # これがないと fisher_exact の利用で exact2x2 が見付からないと怒られる
┌ Warning: RCall.jl: Loading required package: exactci │ Loading required package: ssanv └ @ RCall C:\Users\genkuroki\.julia\packages\RCall\eRsxl\src\io.jl:160
RObject{StrSxp} [1] "exact2x2" "exactci" "ssanv" "stats" "graphics" "grDevices" [7] "utils" "datasets" "methods" "base"
g = fisher_exact(A) # P値が5%未満なので, 95%信頼区間が1を含まない(正常な結果)
RObject{VecSxp} Two-sided Fisher's Exact Test (usual method using minimum likelihood) data: structure(c(10L, 7L, 10L, 27L), .Dim = c(2L, 2L)) p-value = 0.03516 alternative hypothesis: true odds ratio is not equal to 1 95 percent confidence interval: 1.0691 13.4926 sample estimates: odds ratio 3.751532
g = rcopy(g) # RObject の g を Julia の object に変換
OrderedCollections.OrderedDict{Symbol, Any} with 7 entries: :p_value => 0.0351564 :conf_int => [1.0691, 13.4926] :estimate => 3.75153 :null_value => 1.0 :alternative => "two.sided" :method => "Two-sided Fisher's Exact Test (usual method using minimum li… :data_name => "structure(c(10L, 7L, 10L, 27L), .Dim = c(2L, 2L))"
@show g[:p_value]
@show g[:conf_int];
g[:p_value] = 0.03515636840648692 g[:conf_int] = [1.0691, 13.4926]
# 上と同様のことを別の方法で実現
rcall(:library, :exact2x2)
@rput A
R"""h = fisher.exact(A)"""
@rget h
OrderedCollections.OrderedDict{Symbol, Any} with 7 entries: :p_value => 0.0351564 :conf_int => [1.0691, 13.4926] :estimate => 3.75153 :null_value => 1.0 :alternative => "two.sided" :method => "Two-sided Fisher's Exact Test (usual method using minimum li… :data_name => "A"
@show h[:p_value]
@show h[:conf_int];
h[:p_value] = 0.03515636840648692 h[:conf_int] = [1.0691, 13.4926]
R"mtcars <- mtcars"
@rget mtcars
first(mtcars, 5)
5 rows × 11 columns (omitted printing of 2 columns)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | |
---|---|---|---|---|---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | |
1 | 21.0 | 6.0 | 160.0 | 110.0 | 3.9 | 2.62 | 16.46 | 0.0 | 1.0 |
2 | 21.0 | 6.0 | 160.0 | 110.0 | 3.9 | 2.875 | 17.02 | 0.0 | 1.0 |
3 | 22.8 | 4.0 | 108.0 | 93.0 | 3.85 | 2.32 | 18.61 | 1.0 | 1.0 |
4 | 21.4 | 6.0 | 258.0 | 110.0 | 3.08 | 3.215 | 19.44 | 1.0 | 0.0 |
5 | 18.7 | 8.0 | 360.0 | 175.0 | 3.15 | 3.44 | 17.02 | 0.0 | 0.0 |
R"""
library(ggplot2)
library(ggplot2movies)
movies <- movies
# Select 1000 of the movies
movies <- movies[sample(nrow(movies), 1000), ]
"""
@rget movies
first(movies, 5)
5 rows × 24 columns (omitted printing of 16 columns)
title | year | length | budget | rating | votes | r1 | r2 | |
---|---|---|---|---|---|---|---|---|
String | Int64 | Int64 | Int64? | Float64 | Int64 | Float64 | Float64 | |
1 | Mudhoney | 1965 | 92 | 60000 | 5.6 | 140 | 4.5 | 4.5 |
2 | Squeeze a Flower | 1970 | 102 | missing | 3.4 | 6 | 34.5 | 14.5 |
3 | Hussard sur le toit, Le | 1995 | 118 | missing | 6.9 | 1383 | 4.5 | 4.5 |
4 | Mummy's Ghost, The | 1944 | 61 | missing | 5.0 | 192 | 4.5 | 4.5 |
5 | Policewomen | 1974 | 90 | missing | 5.2 | 30 | 0.0 | 0.0 |
rplotsvg()
(MIME type image/svg+xml, OrderedCollections.OrderedDict{Symbol, Any}(:rcalljl_options => OrderedCollections.OrderedDict{Symbol, Any}(:height => 5, :width => 6)))
rplotsize(640, 400)
p = ggplot(data=movies, aes(x=:rating)) # raring -> :rating
p + geom_histogram(color="black", fill="pink") # default bins
RObject{VecSxp}
┌ Warning: RCall.jl: `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. └ @ RCall C:\Users\genkuroki\.julia\packages\RCall\eRsxl\src\io.jl:160
p + geom_histogram(binwidth=1, color="black", fill="pink") # big bins
RObject{VecSxp}
p + geom_histogram(binwidth=0.1, color="black", fill="pink") # small bins
RObject{VecSxp}
p + geom_density(fill="pink")
RObject{VecSxp}
# Boxplot
m = ggplot(data=movies, aes(y=:rating, x=R.factor(0))) # rating, factor -> :rating, R.factor
m + geom_boxplot(fill="pink") + coord_flip()
RObject{VecSxp}
c = ggplot(mtcars, R"aes(cyl)") # aes(cyl) -> R"aes(cyl)"
c + geom_bar(fill="lightgreen")
RObject{VecSxp}
rplotsize(400, 400)
p = ggplot(mtcars, R"aes(wt, mpg)")
p + geom_point(size=4, color="lightgreen")
RObject{VecSxp}
rplotsize(480, 400)
p + geom_point(R"aes(colour=factor(cyl), size = qsec)")
RObject{VecSxp}
rplotsize(480, 400)
p = ggplot(:mpg, R"aes(factor(cyl), hwy, colour=factor(cyl))")
p + geom_point(size=4) # Overlaid dots
RObject{VecSxp}
p + geom_point(size=4, position="jitter") # Jittered dots
RObject{VecSxp}
p + geom_point(size=4, alpha=.2) # Transparent dots
RObject{VecSxp}
rplotsize(670, 600)
p = ggplot(:mpg, R"aes(x=factor(cyl), y=hwy, fill=factor(cyl))")
p + geom_violin(scale = "width")
RObject{VecSxp}
p + geom_violin(scale = "width") + geom_point(size=2, position="jitter")
RObject{VecSxp}
rplotsize(600, 600)
R"""mosaicplot(~ Sex + Survived, data = Titanic, color = TRUE)"""
RObject{NilSxp} NULL
rplotsize(700, 700)
R"""mosaicplot(Titanic, color = TRUE)"""
RObject{NilSxp} NULL
R"""
# Coefficient plots
library(coefplot) # Install this package
model1 <- lm(mpg ~ wt + disp + hp + factor(cyl), data=mtcars)
summary(model1)
"""
RObject{VecSxp} Call: lm(formula = mpg ~ wt + disp + hp + factor(cyl), data = mtcars) Residuals: Min 1Q Median 3Q Max -4.2740 -1.0349 -0.3831 0.9810 5.4192 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 36.002405 2.130726 16.897 1.54e-15 *** wt -3.428626 1.055455 -3.248 0.00319 ** disp 0.004199 0.012917 0.325 0.74774 hp -0.023517 0.012216 -1.925 0.06523 . factor(cyl)6 -3.466011 1.462979 -2.369 0.02554 * factor(cyl)8 -3.753227 2.813996 -1.334 0.19385 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 2.482 on 26 degrees of freedom Multiple R-squared: 0.8578, Adjusted R-squared: 0.8305 F-statistic: 31.37 on 5 and 26 DF, p-value: 3.18e-10
rplotsize(600, 600)
R"""coefplot(model1) + labs(title=NULL) + theme_bw()"""
RObject{VecSxp}