This document serves as an overview for measuring the performance of
RcppAlgos
against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: Permutations and
combinations with/without replacement and for distinct/non-distinct
items/multiset has some benchmarks. You will note that the examples
in that post were relatively small. The benchmarks below will focus on
larger examples where performance really matters and for this reason we
only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a Macbook Pro i7 16Gb
machine. We also tested on a Windows and Linux machine with similar
specs and obtained similar results.
library(RcppAlgos)
library(partitions)
library(arrangements)
library(microbenchmark)
<- capture.output(sessionInfo())
pertinent_output
options(digits = 4)
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.1.3 (2022-03-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Monterey 12.3
cat(pertinent_output[which(pertinent_output == "other attached packages:") + 1L])
#> [1] microbenchmark_1.4-7 arrangements_1.1.9 partitions_1.10-2 RcppAlgos_2.5.1
<- as.integer(RcppAlgos::stdThreadMax() / 2)
numThreads print(numThreads)
#> [1] 4
set.seed(13)
<- sort(sample(100, 30))
v1 <- 21
m <- comboGeneral(v1, m, Parallel = T)
t1 <- combinations(v1, m)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 14307150 21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v1, m),
cbArrangements = combinations(v1, m),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a
#> cbRcppAlgosSer 3.581 2.518 2.516 2.536 2.387 2.336 15 b
#> cbArrangements 3.564 2.502 2.510 2.510 2.398 2.372 15 b
<- v1[1:10]
v2 <- 20
m <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t1 <- combinations(v2, m, replace = TRUE)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 10015005 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
cbArrangements = combinations(v2, m, replace = TRUE),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a
#> cbRcppAlgosSer 2.350 2.554 2.459 2.538 2.462 2.186 15 c
#> cbArrangements 2.421 2.420 2.369 2.420 2.344 2.341 15 b
<- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
myFreqs <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
v3 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t1 <- combinations(freq = myFreqs, k = 20, x = v3)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 14594082 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 2.608 2.589 2.378 2.505 2.107 2.160 10 b
#> cbArrangements 3.776 3.931 3.634 3.829 3.220 3.397 10 c
<- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
v4 <- permuteGeneral(v4, 6, nThreads = numThreads)
t1 <- permutations(v4, 6)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 8910720 6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v4, 6),
cbArrangements = permutations(v4, 6),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a
#> cbRcppAlgosSer 2.324 2.340 2.178 2.380 2.436 1.527 15 b
#> cbArrangements 3.050 3.035 2.893 3.033 3.524 1.950 15 c
## Indexing permutation example with the partitions package
<- permuteGeneral(11, nThreads = 4)
t1 <- permutations(11)
t2 <- perms(11)
t3
dim(t1)
#> [1] 39916800 11
identical(t1, t2)
#> [1] TRUE
identical(t1, t(as.matrix(t3)))
#> [1] TRUE
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
cbRcppAlgosSer = permuteGeneral(11),
cbArrangements = permutations(11),
cbPartitions = perms(11),
times = 5, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5 a
#> cbRcppAlgosSer 3.129 3.299 2.621 3.284 1.947 2.289 5 b
#> cbArrangements 3.664 3.630 2.791 3.639 2.201 2.032 5 b
#> cbPartitions 10.040 9.932 7.974 10.901 6.497 5.797 5 c
<- v3[1:5]
v5 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t1 <- permutations(v5, 10, replace = TRUE)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 9765625 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.261 3.300 4.096 3.296 3.412 6.233 10 b
#> cbArrangements 3.185 3.155 3.067 3.121 3.109 2.655 10 b
<- sort(runif(12))
v6 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t1 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 19520760 7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.396 3.412 2.370 2.930 1.600 1.971 10 b
#> cbArrangements 3.539 3.552 2.521 3.021 2.074 1.990 10 b
<- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 140)
<- partitions(140, distinct = TRUE)
t2 <- diffparts(140)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
identical(dim(t1), dim(t2))
#> [1] TRUE
identical(dim(t1), dim(t(t3)))
#> [1] TRUE
all(rowSums(t1) == 140)
#> [1] TRUE
all(rowSums(t2) == 140)
#> [1] TRUE
all(colSums(t3) == 140)
#> [1] TRUE
dim(t1)
#> [1] 9617150 16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
cbArrangements = partitions(140, distinct = TRUE),
cbPartitions = diffparts(140),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.033 3.055 3.030 3.105 3.613 2.345 10 b
#> cbArrangements 2.843 2.875 2.939 3.184 3.860 2.202 10 b
#> cbPartitions 19.742 20.600 18.420 20.926 20.458 12.175 10 c
<- comboGeneral(160, 10,
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 160)
<- partitions(160, 10, distinct = TRUE)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 8942920 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(160, 10),
cbArrangements = partitions(160, 10, distinct = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.134 3.141 3.002 3.143 3.342 1.838 10 b
#> cbArrangements 3.430 3.426 3.066 3.394 3.379 1.925 10 b
<- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
t1 comparisonFun = "==", limitConstraints = 65)
<- partitions(65)
t2 <- parts(65)
t3
# Each package has different output formats... we only dimensions
# and that each result is a partition of 65
identical(dim(t1), dim(t2))
#> [1] TRUE
identical(dim(t1), dim(t(t3)))
#> [1] TRUE
all(rowSums(t1) == 65)
#> [1] TRUE
all(rowSums(t2) == 65)
#> [1] TRUE
all(colSums(t3) == 65)
#> [1] TRUE
dim(t1)
#> [1] 2012558 65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
cbArrangements = partitions(65),
cbPartitions = parts(65),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a
#> cbRcppAlgosSer 2.996 2.985 2.362 2.691 2.102 1.688 20 b
#> cbArrangements 2.901 2.844 2.297 2.608 2.042 1.831 20 b
#> cbPartitions 10.852 10.695 7.695 9.581 6.311 4.514 20 c
<- comboGeneral(100, 15, TRUE, constraintFun = "sum",
t1 comparisonFun = "==", limitConstraints = 100)
<- partitions(100, 15)
t2 identical(t1, t2)
#> [1] TRUE
dim(t1)
#> [1] 9921212 15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
<- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
t3 identical(partitions(50, 15), t3)
#> [1] TRUE
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
cbArrangements = partitions(100, 15),
cbPartitions = restrictedparts(100, 15,
include.zero = FALSE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.054 3.049 2.222 1.857 2.132 2.115 10 b
#> cbArrangements 3.035 3.047 2.191 1.853 2.088 2.096 10 b
#> cbPartitions 15.445 15.745 10.670 9.610 9.221 9.002 10 c
Currenlty, RcppAlgos
is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos
and use this as a reference for future
improvements.
<- comboGeneral(120, 10, freqs=rep(1:8, 15),
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 120)
dim(t1)
#> [1] 7340225 10
all(rowSums(t1) == 120)
#> [1] TRUE
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
times = 10)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 715.2 724.8 750.5 735.3 791.5 804.5 10
### In RcppAlgos 2.3.6 - 2.4.3
#> Unit: seconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1172.9 1175.1 1223.5 1193.3 1200.5 1482.0 10 10
We will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos
. The results are
similar for the rest of the cases not shown.
<- function(n, total) {
pkg_arrangements <- icombinations(n, as.integer(n / 2))
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- comboIter(n, as.integer(n / 2))
a for (i in 1:total) a@nextIter()
}
<- comboCount(18, 9)
total
total#> [1] 48620
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total),
cbArrangements = pkg_arrangements(18, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1 1.00 15 a
#> cbArrangements 26.56 25.94 24.94 25.39 25 19.61 15 b
<- function(n, total) {
pkg_arrangements <- ipermutations(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- permuteIter(n)
a for (i in 1:total) a@nextIter()
}
<- permuteCount(8)
total
total#> [1] 40320
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total),
cbArrangements = pkg_arrangements(8, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a
#> cbArrangements 26.12 25.83 24.53 24.95 24.03 22.15 15 b
<- function(n, total) {
pkg_partitions <- firstpart(n)
a for (i in 1:(total - 1)) a <- nextpart(a)
}
<- function(n, total) {
pkg_arrangements <- ipartitions(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- partitionsIter(0:n, repetition = TRUE)
a for (i in 1:total) a@nextIter()
}
<- partitionsCount(0:40, repetition = TRUE)
total
total#> [1] 37338
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total),
cbArrangements = pkg_arrangements(40, total),
cbPartitions = pkg_partitions(40, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a
#> cbArrangements 20.84 20.97 20.53 20.51 20.69 18.32 15 b
#> cbPartitions 38.10 38.29 37.01 37.66 37.01 30.80 15 c