using Stats srand(1) x = rand(100) min(x) median(x) max(x) quantile(x, [0.0, 0.5, 1.0]) describe(x) using Distributions x = rand(Gamma(1, 2), 100) d = Normal(0, 1) pdf(d, 0.0) cdf(d, 0.0) quantile(d, 0.1) rand(d) rand(Categorical([0.1, 0.9])) rand(sampler(Categorical([0.5, 0.5]))) Categorical([0.5, 0.5]) sampler(Categorical([0.5, 0.5])) quantile(d, [0.25, 0.75]) -loglikelihood(d, rand(d, 100_000)) / 100_000 entropy(d) mean(d) skewness(d) kurtosis(d) var(d) modes(d) x = rand(d, 1_000) fit_mle(Normal, x) (mean(d), std(d)), (mean(x), std(x)) methods(mean) x = rand(Bernoulli(0.9), 10_000) posterior(Beta(3, 3), Bernoulli, x) using Gadfly x = rand(Gamma(3, 3), 100_000) k = kde(x) names(Distributions.UnivariateKDE) set_default_plot_size(25cm, 15cm) plot(x = k.x, y = k.density, Guide.XLabel("x"), Guide.YLabel("Estimated Density"), Geom.line) using DataFrames NA + 1 x = DataArray([1, 2, 3]) {1, 2, NA} x[1] = NA mean(x) x[!isna(x)] mean(x[!isna(x)]) y = PooledDataArray([1, 1, 2, 3]) levels(y) df = DataFrame(A = float(1:10), B = rand(10)) head(df) tail(df) df["C"] = repeat(["G1", "G2"], inner = [5]) pool!(df, ["C"]) df["C"] levels(df["C"]) repeat([1 2; 3 4], inner = [2, 1], outer = [1, 2],) z = DataArray([1 + 2im]) z[1] = NA DataFrame(A = [DataFrame(B = 1:2), DataFrame(C = 3:4)]) df[1:10, :] by(df, "C", df -> mean(df["B"])) select(:(C .== "G1"), df) df[:(C .== "G1"), :] df["C"] .== "G1" with(df, :(A + B)) using RDatasets iris = data("datasets", "iris") head(iris) plot(iris, x = "Petal.Length", y = "Petal.Width", color = "Species", Geom.point) ModelMatrix(ModelFrame(:(A ~ B), df)) writetable("df.csv", df) df df2 = readtable("df.csv") A = DataFrame(X = 1:3, Z = ["A", "B", "C"]) B = DataFrame(Y = 4:6, Z = ["A", "B", "B"]) join(A, B, on = "Z") join(A, B, on = "Z", kind = :inner) join(A, B, on = "Z", kind = :left) join(A, B, on = "Z", kind = :right) join(A, B, on = "Z", kind = :outer) by(iris, "Species", nrow) by(iris, "Species", df -> mean(df["Petal.Length"])) by(iris, "Species", :(N = size(_DF, 1))) using GLM glm(:(B ~ A), df, Binomial()) glm(:(A ~ B), df, Poisson()) using Optim f(x::Vector) = (10.73 - x[1])^2 + (1134.29 - x[2])^4 f([0.0, 0.0]) optimize(f, [0.0, 0.0]) optimize(f, [0.0, 0.0], method = :l_bfgs) x = rand(Normal(11, 3), 1_000) function makenll(x) nll(params::Vector) = -loglikelihood(Normal(params[1], 3), x) end nll = makenll(x) nll([0.0]) nll([10.0]) optimize(nll, [0.0]) mean(x) using RDatasets iris = data("datasets", "iris") using Clustering kmeans(matrix(iris[:, 2:5])', 3) by(iris, "Species", df -> DataFrame(A = mean(df[2]), B = mean(df[3]), C = mean(df[4]), D = mean(df[5])))