using DataFrames, Random; df = DataFrame(A = 1:4, B = ["M", "F", "F", "M"], C = randn(4)) mat = [1 "M"; 2 "M"] df[:A] == df[:, :A] == df.A df = DataFrame(Int = [1:1:10;], Float = [1.:1.:10.;], Categorical = CategoricalArray(["A","B","C","A","B","C","A","B","C","D"]), String = ["A","B","C","A","B","C","A","B","C","D"]) describe(df) using RDatasets iris = RDatasets.dataset("datasets", "iris") describe(iris, :std, :q25, :q75, :sum => sum) iris[:, r"Sepal"] |> first A = DataFrame(ID = [1,2,3,4,5,8,9,10], Japanese = [10, missing, 30, 40, 50, 80, 90, 100], Math = [30, 40, 20, 50, 10, 60, 90, 60]) B = DataFrame(ID = [7,6,5,4,3,2,1], Books = [20, 1, 10, 5, 7, 3, missing], Parents = CategoricalArray(["大卒", "大卒", "高卒", missing, "大卒", "高卒", "高卒"])) join(A, B, on = :ID, kind = :inner) C = DataFrame(Dokusho = [7,6,5,4,3,2,1], Books = [20, 1, 10, 5, 7, 3, missing]) join(A, C, on = :ID => :Dokusho) # Aのデータを基準として結合 join(A, B, on = :ID, kind = :left) # Bのデータを基準として結合 join(A, B, on = :ID, kind = :right) |> sort! join(A, B, on = :ID, kind = :outer) |> sort! join(A, B, on = :ID, kind = :semi) join(A, B, on = :ID, kind = :anti) D = join(A, B, on = :ID, kind = :outer) longD = DataFrames.stack(D, [:Japanese, :Math], variable_name = :Subject, value_name = :Score); first(longD, 5) using Gadfly plot(longD, xgroup = "Subject", x = "Score", Geom.subplot_grid(Geom.histogram)); melt(D, [:ID, :Books, :Parents], variable_name = :Subject, value_name = :Score) |> x -> first(x, 5) unstack(longD, :Subject, :Score) using RCall R""" library(tidyverse) iris %>% filter(Sepal.Width >= 3.2 & Sepal.Width <= 3.8) %>% group_by(Species) %>% nest %>% mutate(ols = map(data, ~lm(data = .x, formula = Sepal.Width ~ Sepal.Length)), plt = map(ols, ~plot(.x))) %>% pull(ols) """ using DataFramesMeta, Statistics, StatsBase @linq iris |> with(iris[:SepalLength .> 6., :]) |> first(5) @linq iris |> where(5.0 .≥ :SepalWidth .≥ 3.0) |> groupby(:Species) |> transform(StdSepalLn = zscore(:SepalLength)) |> by(:Species, mean = mean(:StdSepalLn), var = std(:StdSepalLn)) OrderedIris = @linq iris |> where(7.0 .≥ :SepalLength) |> groupby(:Species) |> orderby(:SepalLength) map(df -> first(df, 3), OrderedIris) @linq iris |> groupby(:Species) |> based_on(MEAN = mean(:SepalLength), VAR = var(:SepalLength)) @linq iris |> groupby(:Species) |> based_on(MEAN = mean(:SepalLength), DOUBLE = 2*:SepalLength) |> first(5) GroupedIris = @linq iris |> groupby(:Species) @linq GroupedIris[1] |> first(3) using GLM res = map(x -> coef(lm(@formula(SepalLength ~ SepalWidth), x)), GroupedIris) plt = @linq map(df -> plot(df, x="SepalLength", y="SepalWidth", Geom.point), GroupedIris) |> DataFrame gridstack([plt[1,:x1] plt[2,:x1] plt[3,:x1]])