This short tutorial gives an example of how one can statistically assess whether a market is in an equilibrium state. The tutorial assumes some familiarity with the concepts and the functionality of the package. The basic_usage vignette can be helpful in acquiring this familiarity.
Load the required libraries.
Prepare the data. Here, we simply simulate data using a data generating process for a market in equilibrium.
nobs <- 5000
tobs <- 5
alpha_d <- -1.9
beta_d0 <- 4.9
beta_d <- c(2.1, -0.7)
eta_d <- c(3.5, 6.25)
alpha_s <- 2.8
beta_s0 <- 1.2
beta_s <- c(0.65)
eta_s <- c(1.15, 4.2)
sigma_d <- 1
sigma_s <- 1
rho_ds <- 0.5
seed <- 443
eq_data <- simulate_model_data(
  "equilibrium_model", nobs, tobs,
  alpha_d, beta_d0, beta_d, eta_d,
  alpha_s, beta_s0, beta_s, eta_s,
  NA, NA, c(NA),
  sigma_d = sigma_d, sigma_s = sigma_s, rho_ds = rho_ds,
  seed = seed
)Prepare the basic parameters for model initialization.
key_columns <- c("id", "date")
time_column <- c("date")
quantity_column <- "Q"
price_column <- "P"
demand_specification <- paste0(price_column, " + Xd1 + Xd2 + X1 + X2")
supply_specification <- "Xs1 + X1 + X2"
price_specification <- "Xp1"
verbose <- 2
use_correlated_shocks <- TRUEUsing the above parameterization, construct the model objects. Here we construct two equilibrium models and four disequilibrium models. All the models are constructed using the simulated data from a model of market in equilibrium.
eqmdl <- new(
  "equilibrium_model",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Equilibrium FIML with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#> 
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
bsmdl <- new(
  "diseq_basic",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Basic with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#> 
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
damdl <- new(
  "diseq_deterministic_adjustment",
  key_columns, time_column,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data,
  use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Deterministic Adjustment with correlated shocks' model
#> Info: Dropping 5000 rows by generating 'LAGGED_P'.
#> Info: Sample separated with 10003 rows in excess supply and 9997 in excess demand regime.Set the estimation parameters.
Estimate the models.
eqmdl_reg <- estimate(eqmdl, method = "2SLS")
eqmdl_est <- estimate(eqmdl,
  control = optimization_controls,
  method = optimization_method
)
bsmdl_est <- estimate(bsmdl,
  control = optimization_controls,
  method = optimization_method
)
damdl_est <- estimate(damdl,
  control = optimization_controls,
  method = optimization_method
)All the models provide estimates for the simulated data. Even with simulated data, it is difficult to assess which model performs better by examining only the summaries in separation or collectively.
summary(eqmdl_reg$first_stage_model)
#> 
#> Call:
#> lm(formula = first_stage_formula, data = object@model_tibble)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.85453 -0.14407  0.00004  0.14368  0.78798 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  0.761849   0.017106   44.54   <2e-16 ***
#> Xd1          0.448077   0.003055  146.68   <2e-16 ***
#> Xd2         -0.144864   0.003056  -47.41   <2e-16 ***
#> X1           0.505181   0.003026  166.92   <2e-16 ***
#> X2           0.437596   0.003072  142.44   <2e-16 ***
#> Xs1         -0.139496   0.003057  -45.63   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.215 on 19994 degrees of freedom
#> Multiple R-squared:  0.7873, Adjusted R-squared:  0.7873 
#> F-statistic: 1.48e+04 on 5 and 19994 DF,  p-value: < 2.2e-16
summary(eqmdl_reg$system_model)
#> 
#> systemfit results 
#> method: 2SLS 
#> 
#>            N    DF     SSR  detRCov   OLS-R2 McElroy-R2
#> system 40000 39989 39903.8 0.736073 0.899874   0.848746
#> 
#>            N    DF     SSR      MSE     RMSE       R2   Adj R2
#> demand 20000 19994 20008.0 1.000702 1.000351 0.899592 0.899567
#> supply 20000 19995 19895.7 0.995036 0.997515 0.900156 0.900136
#> 
#> The covariance matrix of the residuals
#>         demand   supply
#> demand 1.00070 0.509570
#> supply 0.50957 0.995036
#> 
#> The correlations of the residuals
#>          demand   supply
#> demand 1.000000 0.510661
#> supply 0.510661 1.000000
#> 
#> 
#> 2SLS estimates for 'demand' (equation 1)
#> Model Formula: Q ~ P + Xd1 + Xd2 + X1 + X2
#> <environment: 0x55baaada2ff8>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55baaada2ff8>
#> 
#>               Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept)  4.8762033  0.0827412  58.9332 < 2.22e-16 ***
#> P           -1.8442136  0.1019472 -18.0899 < 2.22e-16 ***
#> Xd1          2.0524139  0.0478747  42.8705 < 2.22e-16 ***
#> Xd2         -0.6858288  0.0205565 -33.3632 < 2.22e-16 ***
#> X1           3.5025023  0.0534337  65.5486 < 2.22e-16 ***
#> X2           6.2117062  0.0468246 132.6589 < 2.22e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.000351 on 19994 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19994 
#> SSR: 20008.043378 MSE: 1.000702 Root MSE: 1.000351 
#> Multiple R-Squared: 0.899592 Adjusted R-Squared: 0.899567 
#> 
#> 
#> 2SLS estimates for 'supply' (equation 2)
#> Model Formula: Q ~ P + Xs1 + X1 + X2
#> <environment: 0x55baaada2ff8>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55baaada2ff8>
#> 
#>              Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept) 1.3095281  0.0768882  17.0316 < 2.22e-16 ***
#> P           2.7509924  0.0300520  91.5410 < 2.22e-16 ***
#> Xs1         0.6409146  0.0148046  43.2914 < 2.22e-16 ***
#> X1          1.1808148  0.0207026  57.0370 < 2.22e-16 ***
#> X2          4.2008260  0.0194740 215.7143 < 2.22e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.997515 on 19995 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19995 
#> SSR: 19895.74173 MSE: 0.995036 Root MSE: 0.997515 
#> Multiple R-Squared: 0.900156 Adjusted R-Squared: 0.900136
bbmle::summary(eqmdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(method = "BFGS", control = list(maxit = 10000, 
#>     reltol = 1e-08), skip.hessian = FALSE, start = c(D_P = 0.248904000229706, 
#> D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, 
#> D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514, 
#> S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, 
#> S_X2 = 4.66113245104969, D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0
#> ), minuslogl = function (...) 
#> minus_log_likelihood(object, ...), gr = function (...) 
#> gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error z value     Pr(z)    
#> D_P        -1.847620   0.102162 -18.085 < 2.2e-16 ***
#> D_CONST     4.845601   0.079952  60.606 < 2.2e-16 ***
#> D_Xd1       2.057105   0.047842  42.998 < 2.2e-16 ***
#> D_Xd2      -0.676054   0.019563 -34.558 < 2.2e-16 ***
#> D_X1        3.503609   0.053540  65.439 < 2.2e-16 ***
#> D_X2        6.213180   0.046915 132.435 < 2.2e-16 ***
#> S_P         2.750472   0.030038  91.567 < 2.2e-16 ***
#> S_CONST     1.311502   0.076848  17.066 < 2.2e-16 ***
#> S_Xs1       0.640863   0.014796  43.313 < 2.2e-16 ***
#> S_X1        1.180568   0.020695  57.045 < 2.2e-16 ***
#> S_X2        4.201078   0.019466 215.811 < 2.2e-16 ***
#> D_VARIANCE  1.001167   0.024052  41.625 < 2.2e-16 ***
#> S_VARIANCE  0.994439   0.011789  84.353 < 2.2e-16 ***
#> RHO         0.510389   0.017863  28.572 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 46374.23
bbmle::summary(bsmdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706, 
#>     D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, 
#>     D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514, 
#>     S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, 
#>     S_X2 = 4.66113245104969, D_VARIANCE = 1, S_VARIANCE = 1, 
#>     RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value     Pr(z)    
#> D_P         0.049812   0.053832   0.9253 0.3548000    
#> D_CONST     4.014641   0.115685  34.7033 < 2.2e-16 ***
#> D_Xd1       1.322024   0.036993  35.7369 < 2.2e-16 ***
#> D_Xd2      -0.455326   0.019782 -23.0177 < 2.2e-16 ***
#> D_X1        2.577838   0.037536  68.6762 < 2.2e-16 ***
#> D_X2        5.390141   0.034642 155.5962 < 2.2e-16 ***
#> S_P         1.761937   0.105878  16.6412 < 2.2e-16 ***
#> S_CONST     2.836331   0.359832   7.8824 3.212e-15 ***
#> S_Xs1       1.119391   0.086554  12.9329 < 2.2e-16 ***
#> S_X1        1.575385   0.091235  17.2674 < 2.2e-16 ***
#> S_X2        4.613717   0.082291  56.0662 < 2.2e-16 ***
#> D_VARIANCE  0.822471   0.017428  47.1932 < 2.2e-16 ***
#> S_VARIANCE  1.372021   0.090718  15.1240 < 2.2e-16 ***
#> RHO         0.204999   0.056839   3.6066 0.0003102 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 51245.47
bbmle::summary(damdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706, 
#>     D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, 
#>     D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514, 
#>     S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, 
#>     S_X2 = 4.66113245104969, P_DIFF = 1, D_VARIANCE = 1, S_VARIANCE = 1, 
#>     RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value  Pr(z)    
#> D_P        -1.849202   0.102147 -18.1034 <2e-16 ***
#> D_CONST     4.860420   0.082592  58.8486 <2e-16 ***
#> D_Xd1       2.054982   0.047717  43.0664 <2e-16 ***
#> D_Xd2      -0.675406   0.019519 -34.6019 <2e-16 ***
#> D_X1        3.501842   0.053401  65.5767 <2e-16 ***
#> D_X2        6.211275   0.046794 132.7369 <2e-16 ***
#> S_P         2.757014   0.030859  89.3424 <2e-16 ***
#> S_CONST     1.284230   0.081551  15.7476 <2e-16 ***
#> S_Xs1       0.640862   0.014799  43.3037 <2e-16 ***
#> S_X1        1.181013   0.020696  57.0636 <2e-16 ***
#> S_X2        4.200912   0.019469 215.7692 <2e-16 ***
#> P_DIFF     -0.013436   0.013574  -0.9898 0.3223    
#> D_VARIANCE  0.999927   0.023931  41.7841 <2e-16 ***
#> S_VARIANCE  0.994434   0.011789  84.3560 <2e-16 ***
#> RHO         0.511222   0.017821  28.6869 <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 46373.32The deterministic adjustment model has price dynamics that are analogous to excess demand and estimates one extra parameter. The directional model estimates one parameter less as the model does not have enough equations to identify prices in both demand and supply equations. The estimated parameters are summarized as follows.
sim_coef <- c(
  alpha_d, beta_d0, beta_d, eta_d,
  alpha_s, beta_s0, beta_s, eta_s,
  NA,
  sigma_d, sigma_s,
  rho_ds
)
names(sim_coef) <- names(damdl_est@coef)
dm_inc <- eqmdl_reg$system_model$coefficients[
  grep(
    "demand",
    names(eqmdl_reg$system_model$coefficients)
  )
]
sp_inc <- eqmdl_reg$system_model$coefficients[
  grep(
    "supply",
    names(eqmdl_reg$system_model$coefficients)
  )
]
lm_coef <- c(
  dm_inc[2], dm_inc[-2], sp_inc[2], sp_inc[-2],
  NA,
  NA, NA,
  NA
)
eqmdl_coef <- append(
  eqmdl_est@coef, c(NA),
  after = which(names(eqmdl_est@coef) ==
    get_prefixed_variance_variable(eqmdl@system@demand)) - 1
)
bsmdl_coef <- append(
  bsmdl_est@coef, c(NA),
  after = which(names(bsmdl_est@coef) ==
    get_prefixed_variance_variable(bsmdl@system@demand)) - 1
)
damdl_coef <- damdl_est@coef
comp <- tibble::tibble(
  parameter = names(sim_coef),
  sim = sim_coef, lm = lm_coef, fi = eqmdl_coef,
  bm = bsmdl_coef, da = damdl_coef,
  lmerr = abs(lm_coef - sim_coef), fierr = abs(eqmdl_coef - sim_coef),
  bmerr = abs(bsmdl_coef - sim_coef), daerr = abs(damdl_coef - sim_coef)
)
comp
#> # A tibble: 15 x 10
#>    parameter   sim     lm     fi      bm      da    lmerr    fierr  bmerr
#>    <chr>     <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl>    <dbl>  <dbl>
#>  1 D_P       -1.9  -1.84  -1.85   0.0498 -1.85    5.58e-2  0.0524   1.95 
#>  2 D_CONST    4.9   4.88   4.85   4.01    4.86    2.38e-2  0.0544   0.885
#>  3 D_Xd1      2.1   2.05   2.06   1.32    2.05    4.76e-2  0.0429   0.778
#>  4 D_Xd2     -0.7  -0.686 -0.676 -0.455  -0.675   1.42e-2  0.0239   0.245
#>  5 D_X1       3.5   3.50   3.50   2.58    3.50    2.50e-3  0.00361  0.922
#>  6 D_X2       6.25  6.21   6.21   5.39    6.21    3.83e-2  0.0368   0.860
#>  7 S_P        2.8   2.75   2.75   1.76    2.76    4.90e-2  0.0495   1.04 
#>  8 S_CONST    1.2   1.31   1.31   2.84    1.28    1.10e-1  0.112    1.64 
#>  9 S_Xs1      0.65  0.641  0.641  1.12    0.641   9.09e-3  0.00914  0.469
#> 10 S_X1       1.15  1.18   1.18   1.58    1.18    3.08e-2  0.0306   0.425
#> 11 S_X2       4.2   4.20   4.20   4.61    4.20    8.26e-4  0.00108  0.414
#> 12 P_DIFF    NA    NA     NA     NA      -0.0134 NA       NA       NA    
#> 13 D_VARIAN…  1    NA      1.00   0.822   1.00   NA        0.00117  0.178
#> 14 S_VARIAN…  1    NA      0.994  1.37    0.994  NA        0.00556  0.372
#> 15 RHO        0.5  NA      0.510  0.205   0.511  NA        0.0104   0.295
#> # … with 1 more variable: daerr <dbl>Since we have used simulated data, we can calculate the average absolute error of the parameter estimation for each of the models. In practice, the population values are unknown and this calculation is impossible.
comp_means <- colMeans(comp[, grep("err", colnames(comp))], na.rm = TRUE)
comp_means
#>      lmerr      fierr      bmerr      daerr 
#> 0.03467257 0.03092702 0.74766291 0.02754986Moreover, the average absolute error cannot provide an overall estimation assessment as the market models have different parameter spaces. To assess the overall model performance one can instead use an information criterion.
model_names <- c(
  eqmdl@model_type_string,
  bsmdl@model_type_string, damdl@model_type_string
)
model_obs <- c(
  get_number_of_observations(eqmdl),
  get_number_of_observations(bsmdl),
  get_number_of_observations(damdl)
)
model_errors <- c(
  comp_means["fierr"],
  comp_means["bmerr"],
  comp_means["daerr"]
)
seltbl <- AIC(eqmdl_est, bsmdl_est, damdl_est) %>%
  tibble::add_column(Model = model_names, .before = 1) %>%
  tibble::add_column(Obs. = model_obs, `Mean Error` = model_errors) %>%
  dplyr::rename(D.F. = df) %>%
  dplyr::arrange(AIC)
seltbl
#>                              Model D.F.      AIC  Obs. Mean Error
#> eqmdl_est         Equilibrium FIML   14 46402.23 20000 0.03092702
#> damdl_est Deterministic Adjustment   15 46403.32 20000 0.02754986
#> bsmdl_est                    Basic   14 51273.47 20000 0.74766291