Fitting epicurves

Tim Taylor

Example

To illustrate the trend fitting functionality of i2extras we will use the simulated Ebola Virus Disease (EVD) outbreak data from the outbreaks package.

Loading relevant packages and data

library(outbreaks)
library(incidence2)
library(i2extras)

raw_dat <- ebola_sim_clean$linelist

For this example we will restrict ourselves to the first 20 weeks of data:

dat <- incidence(
    raw_dat, 
    date_index = date_of_onset,
    interval = "week"
)[1:20, ]
dat
#> An incidence object: 20 x 2
#> date range: [2014-W15] to [2014-W34]
#> cases: 783
#> interval: 1 (Monday) week 
#> 
#>    date_index count
#>        <yrwk> <int>
#>  1   2014-W15     1
#>  2   2014-W16     1
#>  3   2014-W17     5
#>  4   2014-W18     4
#>  5   2014-W19    12
#>  6   2014-W20    17
#>  7   2014-W21    15
#>  8   2014-W22    19
#>  9   2014-W23    23
#> 10   2014-W24    21
#> 11   2014-W25    30
#> 12   2014-W26    22
#> 13   2014-W27    34
#> 14   2014-W28    38
#> 15   2014-W29    61
#> 16   2014-W30    59
#> 17   2014-W31    80
#> 18   2014-W32    86
#> 19   2014-W33   116
#> 20   2014-W34   139
plot(dat)

Modeling incidence

We can use fit_curve() to fit the data with either a poisson or negative binomial regression model. The output from this will be a nested object with class incidence2_fit which has methods available for both automatic plotting and the calculation of growth (decay) rates and doubling (halving) times.

out <- fit_curve(dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 1 x 8
#>   count_variable           data model  estimates   fitting_warning fitting_error
#>   <chr>          <list<tibble[> <list> <list>      <list>          <list>       
#> 1 count                [20 × 2] <glm>  <df [20 × … <NULL>          <NULL>       
#> # … with 2 more variables: prediction_warning <list>, prediction_error <list>
plot(out)

growth_rate(out)
#> # A tibble: 1 x 9
#>   count_variable model      r r_lower r_upper growth_or_decay  time time_lower
#>   <chr>          <list> <dbl>   <dbl>   <dbl> <chr>           <dbl>      <dbl>
#> 1 count          <glm>  0.184   0.168   0.200 doubling         3.77       3.46
#> # … with 1 more variable: time_upper <dbl>

To unnest the data we can use unnest() (a function that has been reexported from the tidyr package.

unnest(out, estimates)
#> # A tibble: 20 x 14
#>    count_variable         data model count date_index estimate lower_ci upper_ci
#>    <chr>          <list<tibbl> <lis> <int>     <yrwk>    <dbl>    <dbl>    <dbl>
#>  1 count              [20 × 2] <glm>     1   2014-W15     4.09     3.20     5.23
#>  2 count              [20 × 2] <glm>     1   2014-W16     4.92     3.91     6.19
#>  3 count              [20 × 2] <glm>     5   2014-W17     5.92     4.77     7.33
#>  4 count              [20 × 2] <glm>     4   2014-W18     7.11     5.82     8.68
#>  5 count              [20 × 2] <glm>    12   2014-W19     8.55     7.11    10.3 
#>  6 count              [20 × 2] <glm>    17   2014-W20    10.3      8.67    12.2 
#>  7 count              [20 × 2] <glm>    15   2014-W21    12.3     10.6     14.4 
#>  8 count              [20 × 2] <glm>    19   2014-W22    14.8     12.9     17.1 
#>  9 count              [20 × 2] <glm>    23   2014-W23    17.8     15.7     20.3 
#> 10 count              [20 × 2] <glm>    21   2014-W24    21.4     19.1     24.0 
#> 11 count              [20 × 2] <glm>    30   2014-W25    25.8     23.3     28.5 
#> 12 count              [20 × 2] <glm>    22   2014-W26    31.0     28.3     33.9 
#> 13 count              [20 × 2] <glm>    34   2014-W27    37.2     34.3     40.4 
#> 14 count              [20 × 2] <glm>    38   2014-W28    44.8     41.5     48.2 
#> 15 count              [20 × 2] <glm>    61   2014-W29    53.8     50.1     57.7 
#> 16 count              [20 × 2] <glm>    59   2014-W30    64.7     60.3     69.4 
#> 17 count              [20 × 2] <glm>    80   2014-W31    77.7     72.2     83.7 
#> 18 count              [20 × 2] <glm>    86   2014-W32    93.4     86.2    101.  
#> 19 count              [20 × 2] <glm>   116   2014-W33   112.     103.     123.  
#> 20 count              [20 × 2] <glm>   139   2014-W34   135.     122.     149.  
#> # … with 6 more variables: lower_pi <int>, upper_pi <int>,
#> #   fitting_warning <list>, fitting_error <list>, prediction_warning <list>,
#> #   prediction_error <list>

fit_curve() also works nicely with grouped incidence2 objects. In this situation, we return a nested tibble with some additional columns that indicate whether there has been a warning or error during the fitting or prediction stages.

grouped_dat <- incidence(
    raw_dat, 
    date_index = date_of_onset,
    interval = "week",
    groups = hospital
)[1:120, ]
grouped_dat
#> An incidence object: 120 x 3
#> date range: [2014-W15] to [2014-W38]
#> cases: 1621
#> interval: 1 (Monday) week 
#> 
#>    date_index hospital                                     count
#>        <yrwk> <fct>                                        <int>
#>  1   2014-W15 Military Hospital                                1
#>  2   2014-W16 Connaught Hospital                               1
#>  3   2014-W17 <NA>                                             2
#>  4   2014-W17 other                                            3
#>  5   2014-W18 <NA>                                             1
#>  6   2014-W18 Connaught Hospital                               1
#>  7   2014-W18 Princess Christian Maternity Hospital (PCMH)     1
#>  8   2014-W18 Rokupa Hospital                                  1
#>  9   2014-W19 <NA>                                             1
#> 10   2014-W19 Connaught Hospital                               3
#> # … with 110 more rows

out <- fit_curve(grouped_dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 6 x 9
#>   count_variable hospital     data model estimates fitting_warning fitting_error
#>   <chr>          <fct>    <list<t> <lis> <list>    <list>          <list>       
#> 1 count          Connaug… [22 × 2] <glm> <df [22 … <NULL>          <NULL>       
#> 2 count          Militar… [21 × 2] <glm> <df [21 … <NULL>          <NULL>       
#> 3 count          other    [20 × 2] <glm> <df [20 … <NULL>          <NULL>       
#> 4 count          Princes… [17 × 2] <glm> <df [17 … <NULL>          <NULL>       
#> 5 count          Rokupa … [18 × 2] <glm> <df [18 … <NULL>          <NULL>       
#> 6 count          <NA>     [22 × 2] <glm> <df [22 … <NULL>          <NULL>       
#> # … with 2 more variables: prediction_warning <list>, prediction_error <list>

# plot with a prediction interval but not a confidence interval
plot(out, ci = FALSE, pi=TRUE)

growth_rate(out)
#> # A tibble: 6 x 10
#>   count_variable hospital      model     r r_lower r_upper growth_or_decay  time
#>   <chr>          <fct>         <lis> <dbl>   <dbl>   <dbl> <chr>           <dbl>
#> 1 count          Connaught Ho… <glm> 0.197   0.177   0.217 doubling         3.53
#> 2 count          Military Hos… <glm> 0.173   0.147   0.200 doubling         4.00
#> 3 count          other         <glm> 0.170   0.141   0.200 doubling         4.09
#> 4 count          Princess Chr… <glm> 0.142   0.101   0.188 doubling         4.87
#> 5 count          Rokupa Hospi… <glm> 0.178   0.133   0.228 doubling         3.89
#> 6 count          <NA>          <glm> 0.184   0.164   0.205 doubling         3.77
#> # … with 2 more variables: time_lower <dbl>, time_upper <dbl>

We provide helper functions, is_ok(), is_warning() and is_error() to help filter the output as necessary.

out <- fit_curve(grouped_dat, model = "negbin", alpha = 0.05)
is_warning(out)
#> # A tibble: 5 x 7
#>   count_variable hospital                  data model estimates  fitting_warning
#>   <chr>          <fct>              <list<tibb> <lis> <list>     <list>         
#> 1 count          Connaught Hospital    [22 × 2] <neg… <df [22 ×… <chr [2]>      
#> 2 count          other                 [20 × 2] <neg… <df [20 ×… <chr [2]>      
#> 3 count          Princess Christia…    [17 × 2] <neg… <df [17 ×… <chr [2]>      
#> 4 count          Rokupa Hospital       [18 × 2] <neg… <df [18 ×… <chr [2]>      
#> 5 count          <NA>                  [22 × 2] <neg… <df [22 ×… <chr [2]>      
#> # … with 1 more variable: prediction_warning <list>
unnest(is_warning(out), fitting_warning)
#> # A tibble: 10 x 7
#>    count_variable hospital                data model  estimates fitting_warning 
#>    <chr>          <fct>            <list<tibb> <list> <list>    <chr>           
#>  1 count          Connaught Hospi…    [22 × 2] <negb… <df [22 … iteration limit…
#>  2 count          Connaught Hospi…    [22 × 2] <negb… <df [22 … iteration limit…
#>  3 count          other               [20 × 2] <negb… <df [20 … iteration limit…
#>  4 count          other               [20 × 2] <negb… <df [20 … iteration limit…
#>  5 count          Princess Christ…    [17 × 2] <negb… <df [17 … iteration limit…
#>  6 count          Princess Christ…    [17 × 2] <negb… <df [17 … iteration limit…
#>  7 count          Rokupa Hospital     [18 × 2] <negb… <df [18 … iteration limit…
#>  8 count          Rokupa Hospital     [18 × 2] <negb… <df [18 … iteration limit…
#>  9 count          <NA>                [22 × 2] <negb… <df [22 … iteration limit…
#> 10 count          <NA>                [22 × 2] <negb… <df [22 … iteration limit…
#> # … with 1 more variable: prediction_warning <list>

Rolling average

We can add a rolling average, across current and previous intervals, to an incidence2 object with the add_rolling_average() function:

ra <- add_rolling_average(grouped_dat, before = 2) # group observations with the 2 prior
ra
#> # A tibble: 6 x 3
#>   count_variable hospital                                     rolling_average  
#>   <chr>          <fct>                                        <list>           
#> 1 count          Connaught Hospital                           <tibble [22 × 3]>
#> 2 count          Military Hospital                            <tibble [21 × 3]>
#> 3 count          other                                        <tibble [20 × 3]>
#> 4 count          Princess Christian Maternity Hospital (PCMH) <tibble [17 × 3]>
#> 5 count          Rokupa Hospital                              <tibble [18 × 3]>
#> 6 count          <NA>                                         <tibble [22 × 3]>
unnest(ra, rolling_average)
#> # A tibble: 120 x 5
#>    count_variable hospital           date_index count rolling_average
#>    <chr>          <fct>                  <yrwk> <int>           <dbl>
#>  1 count          Connaught Hospital   2014-W16     1           NA   
#>  2 count          Connaught Hospital   2014-W18     1           NA   
#>  3 count          Connaught Hospital   2014-W19     3            1.67
#>  4 count          Connaught Hospital   2014-W20     2            2   
#>  5 count          Connaught Hospital   2014-W21     5            3.33
#>  6 count          Connaught Hospital   2014-W22     4            3.67
#>  7 count          Connaught Hospital   2014-W23     6            5   
#>  8 count          Connaught Hospital   2014-W24     6            5.33
#>  9 count          Connaught Hospital   2014-W25    12            8   
#> 10 count          Connaught Hospital   2014-W26     8            8.67
#> # … with 110 more rows

plot(ra, color = "white")
#> Warning: Removed 12 rows containing missing values (position_stack).

#> Warning: Removed 12 rows containing missing values (position_stack).