using Stats
srand(1)
x = rand(100)
min(x)
median(x)
max(x)
quantile(x, [0.0, 0.5, 1.0])
describe(x)
using Distributions
x = rand(Gamma(1, 2), 100)
d = Normal(0, 1)
pdf(d, 0.0)
cdf(d, 0.0)
quantile(d, 0.1)
rand(d)
rand(Categorical([0.1, 0.9]))
rand(sampler(Categorical([0.5, 0.5])))
Categorical([0.5, 0.5])
sampler(Categorical([0.5, 0.5]))
quantile(d, [0.25, 0.75])
-loglikelihood(d, rand(d, 100_000)) / 100_000
entropy(d)
mean(d)
skewness(d)
kurtosis(d)
var(d)
modes(d)
x = rand(d, 1_000)
fit_mle(Normal, x)
(mean(d), std(d)), (mean(x), std(x))
methods(mean)
x = rand(Bernoulli(0.9), 10_000)
posterior(Beta(3, 3), Bernoulli, x)
using Gadfly
x = rand(Gamma(3, 3), 100_000)
k = kde(x)
names(Distributions.UnivariateKDE)
set_default_plot_size(25cm, 15cm)
plot(x = k.x, y = k.density,
Guide.XLabel("x"), Guide.YLabel("Estimated Density"),
Geom.line)
using DataFrames
NA + 1
x = DataArray([1, 2, 3])
{1, 2, NA}
x[1] = NA
mean(x)
x[!isna(x)]
mean(x[!isna(x)])
y = PooledDataArray([1, 1, 2, 3])
levels(y)
df = DataFrame(A = float(1:10), B = rand(10))
head(df)
tail(df)
df["C"] = repeat(["G1", "G2"], inner = [5])
pool!(df, ["C"])
df["C"]
levels(df["C"])
repeat([1 2; 3 4], inner = [2, 1], outer = [1, 2],)
z = DataArray([1 + 2im])
z[1] = NA
DataFrame(A = [DataFrame(B = 1:2), DataFrame(C = 3:4)])
df[1:10, :]
by(df, "C", df -> mean(df["B"]))
select(:(C .== "G1"), df)
df[:(C .== "G1"), :]
df["C"] .== "G1"
with(df, :(A + B))
using RDatasets
iris = data("datasets", "iris")
head(iris)
plot(iris,
x = "Petal.Length", y = "Petal.Width", color = "Species",
Geom.point)
ModelMatrix(ModelFrame(:(A ~ B), df))
writetable("df.csv", df)
df
df2 = readtable("df.csv")
A = DataFrame(X = 1:3, Z = ["A", "B", "C"])
B = DataFrame(Y = 4:6, Z = ["A", "B", "B"])
join(A, B, on = "Z")
join(A, B, on = "Z", kind = :inner)
join(A, B, on = "Z", kind = :left)
join(A, B, on = "Z", kind = :right)
join(A, B, on = "Z", kind = :outer)
by(iris, "Species", nrow)
by(iris, "Species", df -> mean(df["Petal.Length"]))
by(iris, "Species", :(N = size(_DF, 1)))
using GLM
glm(:(B ~ A), df, Binomial())
glm(:(A ~ B), df, Poisson())
using Optim
f(x::Vector) = (10.73 - x[1])^2 + (1134.29 - x[2])^4
f([0.0, 0.0])
optimize(f, [0.0, 0.0])
optimize(f, [0.0, 0.0], method = :l_bfgs)
x = rand(Normal(11, 3), 1_000)
function makenll(x)
nll(params::Vector) = -loglikelihood(Normal(params[1], 3), x)
end
nll = makenll(x)
nll([0.0])
nll([10.0])
optimize(nll, [0.0])
mean(x)
More resources:
using RDatasets
iris = data("datasets", "iris")
using Clustering
kmeans(matrix(iris[:, 2:5])', 3)
by(iris, "Species", df -> DataFrame(A = mean(df[2]),
B = mean(df[3]),
C = mean(df[4]),
D = mean(df[5])))