The goal of hydrorecipes is to supplement the recipes package with a few steps that can help deal with moderately sized water level datasets. These were developed primarily with regression deconvolution in mind and used lm or glmnet, but other model engines could also be used. The following steps are currently available:
You can install the development version of hydrorecipes from GitHub with:
# install.packages("remotes")
::install_github("jkennel/hydrorecipes") remotes
This is the method of Kennel 2020 which uses a distributed lag model and the earthtide package to generate synthetic wave groups. A ~1.5 month dataset of water and barometric pressure having a monitoring frequency of 2 minutes is presented below. The barometric response is modeled over two days using a distributed lag model with 15 regressor terms. The knots are logarithmically separated over two days to accurately capture early and late time responses which can be caused by different physical mechanisms.
library(hydrorecipes)
library(earthtide)
library(tidyr)
library(ggplot2)
data(transducer)
# convert to numeric because step_ns doesn't handle POSIXct
$datetime_num <- as.numeric(transducer$datetime)
transducer
unique(diff(transducer$datetime_num)) # times are regularly spaced
#> [1] 120
# Earth tide inputs
<- earthtide::eterna_wavegroups
wave_groups <- na.omit(wave_groups[wave_groups$time == '1 month', ])
wave_groups <- wave_groups[wave_groups$start > 0.5, ]
wave_groups <- 34.0
latitude <- -118.5
longitude
# create recipe
<- recipe(wl~baro+datetime_num, transducer) |>
rec step_distributed_lag(baro, knots = log_lags(15, 86400 * 2 / 120)) |>
step_earthtide(datetime_num,
latitude = latitude,
longitude = longitude,
astro_update = 1,
wave_groups = wave_groups) |>
step_ns(datetime_num, deg_free = 10) |>
prep()
<- rec |> bake(new_data = NULL) input
summary(fit <- lm(wl~., input))
#>
#> Call:
#> lm(formula = wl ~ ., data = input)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.483e-03 -2.351e-04 -1.957e-05 2.122e-04 1.661e-03
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 5.321e+00 2.253e-03 2361.583 < 2e-16 ***
#> distributed_lag_baro_1 -1.940e-01 1.153e-02 -16.823 < 2e-16 ***
#> distributed_lag_baro_2 2.382e-02 8.787e-03 2.710 0.00672 **
#> distributed_lag_baro_3 7.708e-03 5.264e-03 1.464 0.14308
#> distributed_lag_baro_4 1.379e-02 2.477e-03 5.566 2.63e-08 ***
#> distributed_lag_baro_5 9.756e-03 1.226e-03 7.955 1.84e-15 ***
#> distributed_lag_baro_6 7.818e-03 5.498e-04 14.218 < 2e-16 ***
#> distributed_lag_baro_7 6.230e-03 2.341e-04 26.609 < 2e-16 ***
#> distributed_lag_baro_8 3.435e-03 9.674e-05 35.510 < 2e-16 ***
#> distributed_lag_baro_9 1.633e-03 4.091e-05 39.918 < 2e-16 ***
#> distributed_lag_baro_10 1.946e-04 1.802e-05 10.800 < 2e-16 ***
#> distributed_lag_baro_11 6.958e-05 8.130e-06 8.558 < 2e-16 ***
#> distributed_lag_baro_12 -6.846e-05 4.434e-06 -15.441 < 2e-16 ***
#> distributed_lag_baro_13 -7.734e-02 1.515e-03 -51.058 < 2e-16 ***
#> distributed_lag_baro_14 2.007e-01 3.929e-03 51.080 < 2e-16 ***
#> distributed_lag_baro_15 -1.235e-01 2.415e-03 -51.131 < 2e-16 ***
#> earthtide_cos_1 -9.055e-06 3.359e-06 -2.696 0.00702 **
#> earthtide_sin_1 4.111e-06 3.390e-06 1.213 0.22524
#> earthtide_cos_2 8.808e-05 3.475e-06 25.349 < 2e-16 ***
#> earthtide_sin_2 -3.562e-05 3.502e-06 -10.173 < 2e-16 ***
#> earthtide_cos_3 2.843e-05 2.970e-06 9.573 < 2e-16 ***
#> earthtide_sin_3 -3.611e-05 2.896e-06 -12.467 < 2e-16 ***
#> earthtide_cos_4 1.940e-04 7.839e-06 24.750 < 2e-16 ***
#> earthtide_sin_4 4.292e-05 6.851e-06 6.265 3.78e-10 ***
#> earthtide_cos_5 -4.412e-06 3.250e-06 -1.357 0.17464
#> earthtide_sin_5 -1.397e-05 3.251e-06 -4.298 1.73e-05 ***
#> earthtide_cos_6 1.090e-05 3.714e-06 2.934 0.00335 **
#> earthtide_sin_6 -8.238e-06 3.728e-06 -2.210 0.02712 *
#> earthtide_cos_7 4.002e-06 2.065e-06 1.938 0.05263 .
#> earthtide_sin_7 -1.588e-05 2.067e-06 -7.684 1.59e-14 ***
#> earthtide_cos_8 5.720e-05 2.562e-06 22.331 < 2e-16 ***
#> earthtide_sin_8 -4.668e-05 2.562e-06 -18.219 < 2e-16 ***
#> earthtide_cos_9 3.009e-04 2.695e-06 111.630 < 2e-16 ***
#> earthtide_sin_9 -2.924e-04 2.683e-06 -109.003 < 2e-16 ***
#> earthtide_cos_10 -6.168e-06 3.315e-06 -1.860 0.06285 .
#> earthtide_sin_10 5.686e-05 3.306e-06 17.198 < 2e-16 ***
#> earthtide_cos_11 2.369e-04 6.471e-06 36.605 < 2e-16 ***
#> earthtide_sin_11 -5.476e-05 5.819e-06 -9.410 < 2e-16 ***
#> earthtide_cos_12 2.122e-06 2.531e-06 0.838 0.40194
#> earthtide_sin_12 -2.205e-06 2.536e-06 -0.870 0.38448
#> datetime_num_ns_01 -2.965e-02 2.892e-05 -1025.095 < 2e-16 ***
#> datetime_num_ns_02 -4.064e-02 3.792e-05 -1071.627 < 2e-16 ***
#> datetime_num_ns_03 -6.178e-02 3.313e-05 -1864.753 < 2e-16 ***
#> datetime_num_ns_04 -7.808e-02 3.445e-05 -2266.602 < 2e-16 ***
#> datetime_num_ns_05 -9.233e-02 3.316e-05 -2784.028 < 2e-16 ***
#> datetime_num_ns_06 -1.100e-01 3.553e-05 -3095.275 < 2e-16 ***
#> datetime_num_ns_07 -1.264e-01 3.354e-05 -3768.390 < 2e-16 ***
#> datetime_num_ns_08 -1.355e-01 2.239e-05 -6053.491 < 2e-16 ***
#> datetime_num_ns_09 -1.639e-01 7.195e-05 -2278.361 < 2e-16 ***
#> datetime_num_ns_10 -1.499e-01 1.563e-05 -9587.262 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.0003564 on 35231 degrees of freedom
#> (1440 observations deleted due to missingness)
#> Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
#> F-statistic: 1.067e+07 on 49 and 35231 DF, p-value: < 2.2e-16
The decomposition consists of:
<- predict_terms(fit = fit,
pred rec = rec,
data = input)
<- bind_cols(transducer[, c('datetime', 'wl')], pred)
pred $residuals <- pred$wl - pred$predicted
pred<- pivot_longer(pred, cols = !datetime)
pred_long <-c('intercept', 'ns_datetime_num', 'distributed_lag_baro',
levels 'earthtide_datetime_num', 'predicted', 'wl', 'residuals')
<- c('Intercept', 'Background trend', 'Barometric Component',
labels 'Earth Tide Component', 'Predicted', 'Water Pressure',
'Residuals (obs-mod)')
$name <- factor(pred_long$name,
pred_longlevels = levels,
labels = labels)
ggplot(pred_long, aes(x = datetime, y = value)) +
geom_line() +
scale_y_continuous(labels = scales::comma) +
scale_x_datetime(expand = c(0,0)) +
ggtitle('Water Level Decomposition Results') +
xlab("") +
facet_grid(name~., scales = 'free_y') +
theme_bw()
There are two responses for this model:
<- response(fit, rec)
resp <- resp[resp$name == 'cumulative', ]
resp_ba <- resp_ba[resp_ba$term == 'baro', ]
resp_ba ggplot(resp_ba, aes(x = x * 120 / 3600, y = value)) +
ggtitle('A: Barometric Loading Response') +
xlab('lag (hours)') +
ylab('Cumulative Response') +
scale_y_continuous(limits = c(0, 1)) +
geom_line() +
theme_bw()
<- resp[resp$name %in% c('amplitude', 'phase'), ]
resp_et ggplot(resp_et, aes(x = x, xend = x, y = 0, yend = value)) +
geom_segment() +
ggtitle('B: Earthtide Response') +
xlab('Frequency (cycles per day)') +
ylab('Phase (radians) | Amplitude (dbar)') +
facet_grid(name~., scales = 'free_y') +
theme_bw()