With the package mHMMbayes you can fit multilevel hidden Markov models. The multilevel hidden Markov model (HMM) is a generalization of the well-known hidden Markov model, tailored to accommodate (intense) longitudinal data of multiple individuals simultaneously. Using a multilevel framework, we allow for heterogeneity in the model parameters (transition probability matrix and conditional distribution), while estimating one overall HMM. The model has a great potential of application in many fields, such as the social sciences and medicine. The model can be fitted on multivariate data with a categorical distribution, and include individual level covariates (allowing for e.g., group comparisons on model parameters). Parameters are estimated using Bayesian estimation utilizing the forward-backward recursion within a hybrid Metropolis within Gibbs sampler. The package also includes various options for model visualization, a function to simulate data and a function to obtain the most likely hidden state sequence for each individual using the Viterbi algorithm.
Please do not hesitate to contact me if you have any questions regarding the package.
You can install mHMMbayes from github with:
This is a basic example which shows you how to run the model using example data included with the package, and how to simulate data. For a more elaborate introduction, see the vignette “tutorial-mhmm” accompanying the package.
library(mHMMbayes)
##### Simple 2 state model
# specifying general model properties
m <- 2
n_dep <- 4
q_emiss <- c(3, 2, 3, 2)
# specifying starting values
start_TM <- diag(.8, m)
start_TM[lower.tri(start_TM) | upper.tri(start_TM)] <- .2
start_EM <- list(matrix(c(0.05, 0.90, 0.05,
0.90, 0.05, 0.05), byrow = TRUE,
nrow = m, ncol = q_emiss[1]), # vocalizing patient
matrix(c(0.1, 0.9,
0.1, 0.9), byrow = TRUE, nrow = m,
ncol = q_emiss[2]), # looking patient
matrix(c(0.90, 0.05, 0.05,
0.05, 0.90, 0.05), byrow = TRUE,
nrow = m, ncol = q_emiss[3]), # vocalizing therapist
matrix(c(0.1, 0.9,
0.1, 0.9), byrow = TRUE, nrow = m,
ncol = q_emiss[4])) # looking therapist
# Run a model without covariates.
# Note that normally, a much higher number of iterations J would be used
set.seed(23245)
out_2st <- mHMM(s_data = nonverbal,
gen = list(m = m, n_dep = n_dep, q_emiss = q_emiss),
start_val = c(list(start_TM), start_EM),
mcmc = list(J = 11, burn_in = 5))
#> [1] 10
#> [1] "total time elapsed in minutes 0.42"
out_2st
#> Number of subjects: 10
#>
#> 11 iterations used in the MCMC algorithm with a burn in of 5
#> Average Log likelihood over all subjects: -1639.443
#> Average AIC over all subjects: 3306.885
#>
#> Number of states used: 2
#>
#> Number of dependent variables used: 4
summary(out_2st)
#> State transition probability matrix
#> (at the group level):
#>
#> To state 1 To state 2
#> From state 1 0.934 0.066
#> From state 2 0.058 0.942
#>
#>
#> Emission distribution for each of the dependent variables
#> (at the group level):
#>
#> $p_vocalizing
#> Category 1 Category 2 Category 3
#> State 1 0.031 0.943 0.025
#> State 2 0.766 0.110 0.134
#>
#> $p_looking
#> Category 1 Category 2
#> State 1 0.221 0.779
#> State 2 0.100 0.900
#>
#> $t_vocalizing
#> Category 1 Category 2 Category 3
#> State 1 0.802 0.084 0.106
#> State 2 0.049 0.924 0.031
#>
#> $t_looking
#> Category 1 Category 2
#> State 1 0.041 0.959
#> State 2 0.292 0.708
# Run a model including a covariate
# Here, the covariate (standardized CDI change) predicts the emission
# distribution for each of the 4 dependent variables:
n_subj <- 10
xx <- rep(list(matrix(1, ncol = 1, nrow = n_subj)), (n_dep + 1))
for(i in 2:(n_dep + 1)){
xx[[i]] <- cbind(xx[[i]], nonverbal_cov$std_CDI_change)
}
out_2st_c <- mHMM(s_data = nonverbal, xx = xx,
gen = list(m = m, n_dep = n_dep, q_emiss = q_emiss),
start_val = c(list(start_TM), start_EM),
mcmc = list(J = 11, burn_in = 5))
#> [1] 10
#> [1] "total time elapsed in minutes 0.42"
### Simulating data
# simulating data for 10 subjects with each 100 observations
n_t <- 100
n <- 10
m <- 3
q_emiss <- 4
gamma <- matrix(c(0.8, 0.1, 0.1,
0.2, 0.7, 0.1,
0.2, 0.2, 0.6), ncol = m, byrow = TRUE)
emiss_distr <- matrix(c(0.5, 0.5, 0.0, 0.0,
0.1, 0.1, 0.8, 0.0,
0.0, 0.0, 0.1, 0.9), nrow = m, ncol = q_emiss, byrow = TRUE)
set.seed(1253)
data1 <- sim_mHMM(n_t = n_t, n = n, m = m, q_emiss = q_emiss, gamma = gamma,
emiss_distr = emiss_distr, var_gamma = 1, var_emiss = 1)
head(data1$states)
#> subj state
#> [1,] 1 2
#> [2,] 1 2
#> [3,] 1 2
#> [4,] 1 2
#> [5,] 1 2
#> [6,] 1 2
head(data1$obs)
#> subj observation
#> [1,] 1 2
#> [2,] 1 3
#> [3,] 1 1
#> [4,] 1 2
#> [5,] 1 2
#> [6,] 1 2
# simulating subject specific transition probability matrices and emission distributions only
n_t <- 0
n <- 5
m <- 3
q_emiss <- 4
gamma <- matrix(c(0.8, 0.1, 0.1,
0.2, 0.7, 0.1,
0.2, 0.2, 0.6), ncol = m, byrow = TRUE)
emiss_distr <- matrix(c(0.5, 0.5, 0.0, 0.0,
0.1, 0.1, 0.8, 0.0,
0.0, 0.0, 0.1, 0.9), nrow = m, ncol = q_emiss, byrow = TRUE)
set.seed(549801)
data2 <- sim_mHMM(n_t = n_t, n = n, m = m, q_emiss = q_emiss, gamma = gamma,
emiss_distr = emiss_distr, var_gamma = 1, var_emiss = 1)
data2
#> $subject_gamma
#> $subject_gamma[[1]]
#> [,1] [,2] [,3]
#> [1,] 0.6302 0.2849 0.0849
#> [2,] 0.1817 0.7714 0.0469
#> [3,] 0.2164 0.1738 0.6098
#>
#> $subject_gamma[[2]]
#> [,1] [,2] [,3]
#> [1,] 0.7819 0.1235 0.0945
#> [2,] 0.0747 0.9015 0.0238
#> [3,] 0.3285 0.4705 0.2011
#>
#> $subject_gamma[[3]]
#> [,1] [,2] [,3]
#> [1,] 0.6228 0.1443 0.2329
#> [2,] 0.5242 0.3106 0.1652
#> [3,] 0.5215 0.1167 0.3618
#>
#> $subject_gamma[[4]]
#> [,1] [,2] [,3]
#> [1,] 0.5726 0.1054 0.3220
#> [2,] 0.1751 0.5438 0.2811
#> [3,] 0.2109 0.1686 0.6204
#>
#> $subject_gamma[[5]]
#> [,1] [,2] [,3]
#> [1,] 0.8227 0.1212 0.0561
#> [2,] 0.2029 0.5990 0.1982
#> [3,] 0.0902 0.3200 0.5898
#>
#>
#> $subject_emmis
#> $subject_emmis[[1]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.4916 0.5083 0.0001 0.0000
#> [2,] 0.0572 0.1810 0.7618 0.0000
#> [3,] 0.0000 0.0000 0.0629 0.9371
#>
#> $subject_emmis[[2]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.1451 0.8549 0.0000 0.0000
#> [2,] 0.0510 0.1518 0.7972 0.0000
#> [3,] 0.0000 0.0000 0.0514 0.9486
#>
#> $subject_emmis[[3]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.2158 0.7842 0.0000 0.0000
#> [2,] 0.1865 0.1831 0.6304 0.0000
#> [3,] 0.0001 0.0002 0.5793 0.4204
#>
#> $subject_emmis[[4]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.3268 0.6732 0.0000 0.0000
#> [2,] 0.0501 0.0867 0.8631 0.0000
#> [3,] 0.0000 0.0000 0.1194 0.8806
#>
#> $subject_emmis[[5]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.7268 0.2731 0.0000 0.0001
#> [2,] 0.1303 0.2549 0.6148 0.0000
#> [3,] 0.0000 0.0000 0.1085 0.8915
set.seed(10893)
data3 <- sim_mHMM(n_t = n_t, n = n, m = m, q_emiss = q_emiss, gamma = gamma,
emiss_distr = emiss_distr, var_gamma = .5, var_emiss = .5)
data3
#> $subject_gamma
#> $subject_gamma[[1]]
#> [,1] [,2] [,3]
#> [1,] 0.7958 0.0461 0.1581
#> [2,] 0.3042 0.4663 0.2295
#> [3,] 0.1396 0.6520 0.2084
#>
#> $subject_gamma[[2]]
#> [,1] [,2] [,3]
#> [1,] 0.6221 0.0834 0.2946
#> [2,] 0.1143 0.8430 0.0427
#> [3,] 0.1414 0.3805 0.4780
#>
#> $subject_gamma[[3]]
#> [,1] [,2] [,3]
#> [1,] 0.7416 0.0554 0.203
#> [2,] 0.1403 0.7937 0.066
#> [3,] 0.1915 0.0975 0.711
#>
#> $subject_gamma[[4]]
#> [,1] [,2] [,3]
#> [1,] 0.6333 0.0932 0.2736
#> [2,] 0.1127 0.6909 0.1964
#> [3,] 0.1058 0.3872 0.5070
#>
#> $subject_gamma[[5]]
#> [,1] [,2] [,3]
#> [1,] 0.7610 0.1833 0.0557
#> [2,] 0.0781 0.8920 0.0300
#> [3,] 0.2269 0.1116 0.6615
#>
#>
#> $subject_emmis
#> $subject_emmis[[1]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.6359 0.3641 0.0000 0.0000
#> [2,] 0.2302 0.2625 0.5073 0.0000
#> [3,] 0.0000 0.0000 0.0326 0.9674
#>
#> $subject_emmis[[2]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.6471 0.3528 0.0000 0.0000
#> [2,] 0.1977 0.2782 0.5240 0.0001
#> [3,] 0.0000 0.0000 0.2446 0.7554
#>
#> $subject_emmis[[3]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.7053 0.2946 0.0000 0.0000
#> [2,] 0.1433 0.0626 0.7940 0.0001
#> [3,] 0.0000 0.0000 0.0274 0.9726
#>
#> $subject_emmis[[4]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.8119 0.1880 0.0000 0.0000
#> [2,] 0.0704 0.0669 0.8627 0.0000
#> [3,] 0.0000 0.0000 0.1557 0.8443
#>
#> $subject_emmis[[5]]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.5968 0.4032 0.0000 0.0000
#> [2,] 0.1023 0.1158 0.7819 0.0000
#> [3,] 0.0000 0.0000 0.1358 0.8642