Introduction

trending aims to provides a coherent interface to several modelling tools. Whilst it is useful in an interactive context, it’s main focus is to provide an intuitive interface on which other packages can be developed (e.g. trendbreaker).

Main features

*   Requires brms

Example usage

An individual model

library(outbreaks)  # for data
library(trending)   # for trend fitting
library(dplyr, warn.conflicts = FALSE)  # for data manipulation

# load data
data(covid19_england_nhscalls_2020)

# define a model
model  <- glm_nb_model(count ~ day + weekday)

# select 6 weeks of data (from a period when the prevalence was decreasing)
last_date <- as.Date("2020-05-28")
first_date <- last_date - 8*7
pathways_recent <-
  covid19_england_nhscalls_2020 %>%
  filter(date >= first_date, date <= last_date) %>%
  group_by(date, day, weekday) %>%
  summarise(count = sum(count), .groups = "drop")

# split data for fitting and prediction
dat <-
  pathways_recent %>%
  group_by(date <= first_date + 6*7) %>%
  group_split()

fitting_data <- dat[[2]]
pred_data <- select(dat[[1]], date, day, weekday)

fitted_model <- fit(model, fitting_data)

# default
fitted_model %>% 
  predict(pred_data) %>%
  glimpse()
#> Rows: 14
#> Columns: 8
#> $ date     <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day      <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday  <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
#> $ lower_pi <dbl> 8750, 7309, 7152, 9534, 7663, 7169, 7202, 6932, 5656, 5563, 7…
#> $ upper_pi <dbl> 17091, 14588, 14101, 19191, 15076, 14380, 14191, 13642, 11453…

# without prediction intervals
fitted_model %>% 
  predict(pred_data, add_pi = FALSE) %>% 
  glimpse()
#> Rows: 14
#> Columns: 6
#> $ date     <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day      <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday  <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…

# without uncertainty
fitted_model %>% 
  predict(pred_data, uncertainty = FALSE) %>% 
  glimpse()
#> Rows: 14
#> Columns: 8
#> $ date     <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day      <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday  <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
#> $ lower_pi <dbl> 9070, 7401, 7198, 9615, 7675, 7489, 7136, 6847, 5702, 5393, 7…
#> $ upper_pi <dbl> 17184, 14557, 14030, 18996, 15255, 14584, 14255, 13568, 11433…

# non-bootstraped (parametric) prediction intervals
fitted_model %>% 
  predict(pred_data, simulate_pi = FALSE) %>% 
  glimpse()
#> Rows: 14
#> Columns: 8
#> $ date     <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day      <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday  <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
#> $ lower_pi <dbl> 8107, 6618, 6373, 8363, 6962, 6701, 6450, 6208, 5079, 4889, 6…
#> $ upper_pi <dbl> 18870, 16223, 15714, 21784, 16638, 16124, 15626, 15145, 12992…

Multiple models

When working with a list of models Trending captures both errors and warnings so the user can choose how to proceed. In this situation the fitted output will be of class trending_model_fit_list which is a list with named lists for each model containing components result, warnings and errors. If an error occurs, then the error object is captured and the result has a NULL value. If the model does not error then result is the corresponding model output and the error entry will be NULL. Similarly, the warning component will be a vector of all warnings that occurred.

models  <- list(
  simple = lm_model(count ~ day),
  glm_poisson = glm_model(count ~ day, family = "poisson"),
  glm_negbin = glm_nb_model(count ~ day + weekday),
  will_error = glm_nb_model(count ~ day + nonexistant)
)

res <- models %>%
  fit(fitting_data)

res
#> # A tibble: 4 x 5
#>   model_name  data                 fitted_model  fitting_warnings fitting_errors
#>   <chr>       <list>               <named list>  <named list>     <named list>  
#> 1 simple      <tibble[,5] [43 × 5… <trndng__ [2… <NULL>           <NULL>        
#> 2 glm_poisson <tibble[,5] [43 × 5… <trndng__ [2… <NULL>           <NULL>        
#> 3 glm_negbin  <tibble[,5] [43 × 5… <trndng__ [2… <NULL>           <NULL>        
#> 4 will_error  <tibble[,5] [43 × 5… <NULL>        <NULL>           <chr [1]>

res %>% glimpse()
#> Rows: 4
#> Columns: 5
#> $ model_name       <chr> "simple", "glm_poisson", "glm_negbin", "will_error"
#> $ data             <list> [<tbl_df[43 x 5]>], [<tbl_df[43 x 5]>], [<tbl_df[43 x…
#> $ fitted_model     <named list> [[69201.584, -1092.913, 19109.1089, 11650.0217, 1789…
#> $ fitting_warnings <named list> <NULL>, <NULL>, <NULL>, <NULL>
#> $ fitting_errors   <named list> <NULL>, <NULL>, <NULL>, "object 'nonexistant' …

trending_model_fit_list objects can then be used with predict() which similarly captures warnings and errors and returns the model predictions in a column called output.

res <- models %>%
  fit(fitting_data) %>% 
  predict(pred_data)

res
#> # A tibble: 4 x 8
#>   model_name  data        fitted_model fitting_warnings fitting_errors output   
#>   <chr>       <list>      <named list> <named list>     <named list>   <named l>
#> 1 simple      <tibble[,5… <trndng__ [… <NULL>           <NULL>         <df[,8] …
#> 2 glm_poisson <tibble[,5… <trndng__ [… <NULL>           <NULL>         <df[,8] …
#> 3 glm_negbin  <tibble[,5… <trndng__ [… <NULL>           <NULL>         <df[,8] …
#> 4 will_error  <tibble[,5… <NULL>       <NULL>           <chr [1]>      <NULL>   
#> # … with 2 more variables: prediction_warnings <named list>,
#> #   prediction_errors <named list>

res %>% glimpse()
#> Rows: 4
#> Columns: 8
#> $ model_name          <chr> "simple", "glm_poisson", "glm_negbin", "will_error"
#> $ data                <list> [<tbl_df[43 x 5]>], [<tbl_df[43 x 5]>], [<tbl_df[4…
#> $ fitted_model        <named list> [[69201.584, -1092.913, 19109.1089, 11650.0217, 1…
#> $ fitting_warnings    <named list> <NULL>, <NULL>, <NULL>, <NULL>
#> $ fitting_errors      <named list> <NULL>, <NULL>, <NULL>, "object 'nonexistan…
#> $ output              <named list> [<data.frame[14 x 8]>], [<data.frame[14 x 8…
#> $ prediction_warnings <named list> <NULL>, "The response is not continuous, so…
#> $ prediction_errors   <named list> <NULL>, <NULL>, <NULL>, "no applicable meth…