Most of these examples are adapted from Advanced R by Hadley Wickham (2nd Edition), Chapter 10: Function Factories.
In the exercises for section 10.2.6, we’re asked to produce a
pick
factory that basically acts like [[
, such
that pick(1)(x)
is equivalent to x[[1]]
. We
can relatively easily create this simple factory in {factory}.
pick <- build_factory(
function(x) x[[i]],
i
)
identical(pick(1)(mtcars), mtcars[[1]])
#> [1] TRUE
identical(pick(2)(mtcars), mtcars[[2]])
#> [1] TRUE
identical(pick(3)(mtcars$disp), mtcars$disp[[3]])
#> [1] TRUE
identical(
lapply(mtcars, pick(5)),
lapply(mtcars, function(x) x[[5]])
)
#> [1] TRUE
We’re also asked to create another factory, this time for finding the ith central moment. We first create a two-argument function to calculate the central moment.
moment2 <- function(x, i) {
1/length(x) *
sum(
(x - mean(x))^i
)
}
x <- runif(100)
all.equal(moment2(x, 1), 0)
#> [1] TRUE
all.equal(moment2(x, 2), var(x) * 99/100)
#> [1] TRUE
Since this works, we can pull i
out to make our
factory.
The {scales} package contains a number of function factories. These factories are written in the traditional format, and thus produce confusing functions. Let’s see if we can make them easier to work with.
One of the workhorse functions of {scales} is
number_format
.
scales::number_format
#> function (accuracy = NULL, scale = 1, prefix = "", suffix = "",
#> big.mark = " ", decimal.mark = ".", style_positive = c("none",
#> "plus", "space"), style_negative = c("hyphen", "minus",
#> "parens"), scale_cut = NULL, trim = TRUE, ...)
#> {
#> force_all(accuracy, scale, prefix, suffix, big.mark, decimal.mark,
#> style_positive, style_negative, scale_cut, trim, ...)
#> function(x) {
#> number(x, accuracy = accuracy, scale = scale, prefix = prefix,
#> suffix = suffix, big.mark = big.mark, decimal.mark = decimal.mark,
#> style_positive = style_positive, style_negative = style_negative,
#> scale_cut = scale_cut, trim = trim, ...)
#> }
#> }
#> <bytecode: 0x55915a6cebe0>
#> <environment: namespace:scales>
This factory takes several arguments, and returns a function that is
simply a call to the number
function. Let’s see if we can
recreate this factory. I’m naming the rebuilt versions with a
format_
prefix instead of suffix, to “fix” the “unfortunate
accident of history” mentioned by Hadley Wickham while discussing these
examples.
format_number <- build_factory(
function(x, ...) {
scales::number(
x,
accuracy = accuracy, scale = scale, prefix = prefix, suffix = suffix,
big.mark = big.mark, decimal.mark = decimal.mark, trim = trim, ...
)
},
accuracy = NULL,
scale = 1,
prefix = "",
suffix = "",
big.mark = " ",
decimal.mark = ".",
trim = TRUE,
.pass_dots = TRUE
)
identical(
scales::number_format(width = 8)(1:10 * 10000),
format_number(width = 8)(1:10 * 10000)
)
#> [1] TRUE
We had to do a couple special things to get our factory to behave like the {scales} version:
...
from the factory
without officially declaring dots as an argument to the manufactured
function. We more formally include the dots.build_factory
that we want to
pass_dots
from the factory to its constructed
functions.Our factory also works to define our own version of
comma_format
.
scales::comma_format
#> function (accuracy = NULL, scale = 1, prefix = "", suffix = "",
#> big.mark = ",", decimal.mark = ".", trim = TRUE, digits,
#> ...)
#> {
#> if (!missing(digits)) {
#> lifecycle::deprecate_stop(when = "1.0.0", what = "label_comma(digits)",
#> with = "label_comma(accuracy)")
#> }
#> number_format(accuracy = accuracy, scale = scale, prefix = prefix,
#> suffix = suffix, big.mark = big.mark, decimal.mark = decimal.mark,
#> trim = trim, ...)
#> }
#> <bytecode: 0x55915b326ff0>
#> <environment: namespace:scales>
format_comma <- function(accuracy = NULL, scale = 1, prefix = "",
suffix = "", big.mark = ",", decimal.mark = ".",
trim = TRUE, digits, ...) {
if (!missing(digits)) {
warning("`digits` argument is deprecated, use `accuracy` instead.",
.call = FALSE)
}
format_number(
accuracy = accuracy, scale = scale, prefix = prefix, suffix = suffix,
big.mark = big.mark, decimal.mark = decimal.mark, trim = trim, ...
)
}
identical(
scales::comma_format(width = 8)(1:10 * 10000),
format_comma(width = 8)(1:10 * 10000)
)
#> [1] TRUE
The binwidth
argument of
ggplot2::geom_histogram
can be a function. Let’s recreate
examples of binwidth function factories.
binwidth_bins <- build_factory(
function(x) {
(max(x) - min(x)) / n
},
n
)
sd <- c(1, 5, 15)
m <- 100
df <- data.frame(
x = rnorm(3 * m, sd = sd),
sd = rep(sd, m)
)
df %>%
ggplot2::ggplot() +
ggplot2::aes(x) +
ggplot2::geom_histogram(binwidth = 2) +
ggplot2::facet_wrap(~ sd, scales = "free_x") +
ggplot2::labs(x = NULL)
df %>%
ggplot2::ggplot() +
ggplot2::aes(x) +
ggplot2::geom_histogram(binwidth = binwidth_bins(20)) +
ggplot2::facet_wrap(~ sd, scales = "free_x") +
ggplot2::labs(x = NULL)
We can also wrap functions from {grDevices} that automatically find “optimal” binwidth.
base_bins <- build_factory(
.internal_variables = list(
nclass_fun = switch(
type,
Sturges = grDevices::nclass.Sturges,
scott = grDevices::nclass.scott,
FD = grDevices::nclass.FD,
stop("Unknown type", call. = FALSE)
)
),
fun = function(x) {
(max(x) - min(x)) / nclass_fun(x)
},
type
)
df %>%
ggplot2::ggplot() +
ggplot2::aes(x) +
ggplot2::geom_histogram(binwidth = base_bins("FD")) +
ggplot2::facet_wrap(~ sd, scales = "free_x") +
ggplot2::labs(x = NULL)
Function factories can also be used to create bootstrap generators.
boot_permute <- build_factory(
.internal_variables = list(
n = nrow(df)
),
fun = function() {
col <- df[[var]]
col[sample(n, replace = TRUE)]
},
df,
var
)
boot_mtcars1 <- boot_permute(mtcars, "mpg")
head(boot_mtcars1())
#> [1] 17.3 30.4 33.9 30.4 32.4 14.7
head(boot_mtcars1())
#> [1] 10.4 30.4 21.0 19.2 16.4 16.4
This is particularly useful when the bootstrap depends on a model.
boot_model <- build_factory(
.internal_variables = list(
mod = lm(formula, data = df),
fitted_vals = unname(fitted(mod)),
resid_vals = unname(resid(mod))
),
fun = function() {
fitted_vals + sample(resid_vals)
},
df,
formula
)
boot_mtcars2 <- boot_model(mtcars, mpg ~ wt)
head(boot_mtcars2())
#> [1] 23.08247 21.00000 23.00293 20.40251 21.00343 14.88789
head(boot_mtcars2())
#> [1] 22.25512 19.83382 26.08701 16.89729 16.28914 20.09060
Function factories are also useful for maximum likelihood estimation (MLE). Here we’ll compute lambda for a Poisson distribution.
ll_poisson <- build_factory(
.internal_variables = list(
n = length(x),
sum_x = sum(x),
c_var = sum(lfactorial(x))
),
fun = function(lambda) {
log(lambda) * sum_x - n * lambda - c_var
},
x
)
# Say we have this vector of observations.
x1 <- c(41, 30, 31, 38, 29, 24, 30, 29, 31, 38)
ll1 <- ll_poisson(x1)
ll1(10)
#> [1] -183.6405
ll1(20)
#> [1] -61.14028
ll1(30)
#> [1] -30.98598
optimize(ll1, c(0, 100), maximum = TRUE)
#> $maximum
#> [1] 32.09999
#>
#> $objective
#> [1] -30.26755
We can see that this is a more efficient process than not using a function factory.
# Slightly change the dataset to prove that the factory version isn't
# pre-computed. We also need a reasonably large x2 for the efficiency to pay off
# (it starts to pay off around size = 30, but size = 100 is clearer and closer
# to a real dataset).
x2 <- sample(20:50, size = 100, replace = TRUE)
# I'm defining both the factory and the non-factory function outside of optim.
lprob_poisson <- function(lambda, x) {
n <- length(x)
(log(lambda) * sum(x)) - (n * lambda) - sum(lfactorial(x))
}
bench::mark(
with_factory = {
ll2 <- ll_poisson(x2)
optimize(
ll2,
c(0, 100),
maximum = TRUE
)
},
without_factory = {
optimize(
lprob_poisson,
c(0, 100),
x = x2,
maximum = TRUE
)
}
)
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 with_factory 25.3µs 26.8µs 35997. 72.4KB 14.4
#> 2 without_factory 59.6µs 62.1µs 15676. 54.7KB 6.13