
library(microbenchmark)

generate_random_variables <- function(n, p){
  set.seed(100)
  matrix(rnorm(n*p), p, n)
}

for_loop <- function(X) {
  alpha <- matrix(NA, nrow(X), nrow(X))
  for (i in seq_len(nrow(X))) {
    for (j in seq(i, nrow(X))) {
      alpha[i, j] <- alpha[j, i] <- var(X[i,], X[j,])
    }
  }
  alpha
}
for_loop(generate_random_variables(20, 3))

apply_func <- function(X) {
  ind <- expand.grid(1:nrow(X), 1:nrow(X))
  matrix(
    apply(
      ind, 
      1,
      function(k) var(X[k[1], ], X[k[2], ])
    ),
    nrow = nrow(X)
  )
}
apply_func(generate_random_variables(20, 3))

diag_method <- function(X) {
  res1 <- matrix(data = NA_real_, nrow = nrow(X), ncol = nrow(X))
  diag(res1) <- apply(X, 1, var)
  res1[lower.tri(res1)] <- 
    combn(1:nrow(X), 2) %>%
    apply(2, function(k){
      var(X[k[1], ], X[k[2], ])
    })
  res1[upper.tri(res1)] <- res1 %>% t %>% .[upper.tri(.)]
  res1
}
diag_method(generate_random_variables(20, 3))

# var(x, y) = sum_i (X_i - X_bar) (Y_i - Y_bar)
formula_calc <- function(X) {
  temp <- sweep(X, 1, rowMeans(X), `-`)
  temp %*% t(temp) / (ncol(X)-1)
}
formula_calc(generate_random_variables(20, 3))

microbenchmark(
  for_loop(generate_random_variables(20000, 50)),
  apply_func(generate_random_variables(20000, 50)),
  diag_method(generate_random_variables(20000, 50)),
  formula_calc(generate_random_variables(20000, 50)),
  var(t(generate_random_variables(20000, 50))),
  times = 20L
)
# Unit: milliseconds
#          expr       min        lq      mean    median        uq       max
#      for_loop  791.3887  814.2140  876.8024  872.5140  907.3111 1071.6495
#    apply_func 1486.9081 1542.1674 1607.6525 1592.6940 1664.5603 1796.9124
#   diag_method  802.1050  825.0755  938.7255  888.7893  973.0518 1453.5382
#  formula_calc  105.0659  110.9181  133.3554  118.8704  140.6495  249.5756
#           var  122.8382  124.8452  125.7241  125.1085  126.3662  130.1424
