Is there such “colsd” in R?

I want to provide a fourth (very similar to @Thomas) approach and some benchmarking:

library("microbenchmark")
library("matrixStats")

colSdApply <- function(x, ...)apply(X=x, MARGIN=2, FUN=sd, ...)
colSdMatrixStats <- colSds

colSdColMeans <- function(x, na.rm=TRUE) {
  if (na.rm) {
    n <- colSums(!is.na(x)) # thanks @flodel
  } else {
    n <- nrow(x)
  }
  colVar <- colMeans(x*x, na.rm=na.rm) - (colMeans(x, na.rm=na.rm))^2
  return(sqrt(colVar * n/(n-1)))
}

colSdThomas <- function(x)sqrt(rowMeans((t(x)-colMeans(x))^2)*((dim(x)[1])/(dim(x)[1]-1)))

m <- matrix(runif(1e7), nrow=1e3)

microbenchmark(colSdApply(m), colSdMatrixStats(m), colSdColMeans(m), colSdThomas(m))

# Unit: milliseconds
#                 expr      min       lq   median       uq      max neval
#        colSdApply(m) 435.7346 448.8673 456.6176 476.8373 512.9783   100
#  colSdMatrixStats(m) 344.6416 357.5439 383.8736 389.0258 465.5715   100
#     colSdColMeans(m) 124.2028 128.9016 132.9446 137.6254 172.6407   100
#       colSdThomas(m) 231.5567 240.3824 245.4072 274.6611 307.3806   100


all.equal(colSdApply(m), colSdMatrixStats(m))
# [1] TRUE
all.equal(colSdApply(m), colSdColMeans(m))
# [1] TRUE
all.equal(colSdApply(m), colSdThomas(m))
# [1] TRUE

Leave a Comment