## see Advanced R 17.3 by Wickham for more on the microbenchmark package library(microbenchmark) ## create data sets in sizes of power of 10 ns <- 2^(11:15) x <- vector("list",length(ns)) for(ii in 1:length(x)){ x[[ii]] <- rnorm(ns[ii]) } ## computes mean for data set x[[ii]] MeanTest <- function(ii){ mean(x[[ii]]) } out <- microbenchmark(MeanTest(1),MeanTest(2),MeanTest(3),MeanTest(4),MeanTest(5)) summary(out) summary(out)$median options(repr.plot.height=4,repr.plot.width=5) plot(ns,summary(out)$median,xlab="n (sample size)",ylab="Median Execution Time") x <- rnorm(1e6) y <- rnorm(1e7) out <- microbenchmark(mean(x),mean(y),sd(x),sd(y)) summary(out) x <- rnorm(1e6) out <- microbenchmark(mean(x),median(x)) summary(out) ## generate data from a 2-dimensional Gaussian mixture model library(MixSim) set.seed(1) Q <- MixSim(BarOmega = 0.01,K = 2, p = 2) A <- simdataset(n = 500, Pi = Q$Pi, Mu = Q$Mu, S = Q$S) plot(A$X) ## computes Euclidean distance between all pairs of observations D <- dist(A$X) ## finds groups using distances out <- hclust(D, method = "complete", members = NULL) ## find two groups, get membership group <- cutree(out,k=2) plot(A$X,col=group) ## how does this algorithm scale? ## analyze just the computation of D ## create data sets in sizes of power of 10 ns <- 2^(6:10) X <- vector("list",length(ns)) for(ii in 1:length(X)){ X[[ii]] <- simdataset(n = ns[ii], Pi = Q$Pi, Mu = Q$Mu, S = Q$S)$X } ## computes mean for data set x[[ii]] DistanceTest <- function(ii){ dist(X[[ii]]) } out <- microbenchmark(DistanceTest(1),DistanceTest(2),DistanceTest(3),DistanceTest(4),DistanceTest(5),times=20) summary(out) plot(ns,summary(out)$median,xlab="n (sample size)",ylab="Median Execution Time")