using Stats

srand(1)

x = rand(100)

min(x)

median(x)

max(x)

quantile(x, [0.0, 0.5, 1.0])

describe(x)

using Distributions

x = rand(Gamma(1, 2), 100)

d = Normal(0, 1)

pdf(d, 0.0)

cdf(d, 0.0)

quantile(d, 0.1)

rand(d)

rand(Categorical([0.1, 0.9]))

rand(sampler(Categorical([0.5, 0.5])))

Categorical([0.5, 0.5])

sampler(Categorical([0.5, 0.5]))

quantile(d, [0.25, 0.75])

-loglikelihood(d, rand(d, 100_000)) / 100_000

entropy(d)

mean(d)

skewness(d)

kurtosis(d)

var(d)

modes(d)

x = rand(d, 1_000)

fit_mle(Normal, x)

(mean(d), std(d)), (mean(x), std(x))

methods(mean)

x = rand(Bernoulli(0.9), 10_000)

posterior(Beta(3, 3), Bernoulli, x)

using Gadfly

x = rand(Gamma(3, 3), 100_000)

k = kde(x)

names(Distributions.UnivariateKDE)

set_default_plot_size(25cm, 15cm)

plot(x = k.x, y = k.density,
     Guide.XLabel("x"), Guide.YLabel("Estimated Density"),
     Geom.line)

using DataFrames

NA + 1

x = DataArray([1, 2, 3])

{1, 2, NA}

x[1] = NA

mean(x)

x[!isna(x)]

mean(x[!isna(x)])

y = PooledDataArray([1, 1, 2, 3])

levels(y)

df = DataFrame(A = float(1:10), B = rand(10))

head(df)

tail(df)

df["C"] = repeat(["G1", "G2"], inner = [5])

pool!(df, ["C"])

df["C"]

levels(df["C"])

repeat([1 2; 3 4], inner = [2, 1], outer = [1, 2],)

z = DataArray([1 + 2im])

z[1] = NA

DataFrame(A = [DataFrame(B = 1:2), DataFrame(C = 3:4)])

df[1:10, :]

by(df, "C", df -> mean(df["B"]))

select(:(C .== "G1"), df)

df[:(C .== "G1"), :]

df["C"] .== "G1"

with(df, :(A + B))

using RDatasets

iris = data("datasets", "iris")

head(iris)

plot(iris,
     x = "Petal.Length", y = "Petal.Width", color = "Species",
     Geom.point)

ModelMatrix(ModelFrame(:(A ~ B), df))

writetable("df.csv", df)

df

df2 = readtable("df.csv")

A = DataFrame(X = 1:3, Z = ["A", "B", "C"])

B = DataFrame(Y = 4:6, Z = ["A", "B", "B"])

join(A, B, on = "Z")

join(A, B, on = "Z", kind = :inner)

join(A, B, on = "Z", kind = :left)

join(A, B, on = "Z", kind = :right)

join(A, B, on = "Z", kind = :outer)

by(iris, "Species", nrow)

by(iris, "Species", df -> mean(df["Petal.Length"]))

by(iris, "Species", :(N = size(_DF, 1)))

using GLM

glm(:(B ~ A), df, Binomial())

glm(:(A ~ B), df, Poisson())

using Optim

f(x::Vector) = (10.73 - x[1])^2 + (1134.29 - x[2])^4

f([0.0, 0.0])

optimize(f, [0.0, 0.0])

optimize(f, [0.0, 0.0], method = :l_bfgs)

x = rand(Normal(11, 3), 1_000)

function makenll(x)
    nll(params::Vector) = -loglikelihood(Normal(params[1], 3), x)
end

nll = makenll(x)

nll([0.0])

nll([10.0])

optimize(nll, [0.0])

mean(x)

using RDatasets

iris = data("datasets", "iris")

using Clustering

kmeans(matrix(iris[:, 2:5])', 3)

by(iris, "Species", df -> DataFrame(A = mean(df[2]),
                                    B = mean(df[3]),
                                    C = mean(df[4]),
                                    D = mean(df[5])))