Bayesian Approach to Latent Class Models

1). Directed acyclic graph:

For Subject $$i$$, the class indicator $$\eta_i$$ are distributed as a categorical random variable with category probabilities $$\boldsymbol{\pi}=(\pi_1, \ldots, \pi_{M_0})'$$. Given a realized value of $$\eta_i$$, say $$j$$ in a particular observation, the response parameters $$\boldsymbol{p}_{j\cdot}=(p_{j1},\ldots, p_{jK})'$$ determine the probability of $$\{P(Y_{ik}=1\mid \eta_i=j,\boldsymbol{p}_{j\cdot})=p_{jk}\}_{k=1}^K$$, one per symptom $$k$$.

2). $$p_{jk}=p_{j+1,k}$$, for $$j=1,\ldots, M_0-1$$. Interpretation: when the depression symptom $$k$$ does not provide information to distinguish subject $$i$$ into one of the $$M_0$$ latent depression classes.

3). Simulate dataset $$D^*$$

M0 <- 3
K  <- 5
N  <- 300

set.seed(20160901)

p_mat <- matrix(c(0.1,0.9,0.1,0.15,0.1,
0.4,0.4,0.45,0.5,0.4,
0.95,0.1,0.9,0.9,0.9),nrow=M0,byrow=TRUE)

pi_vec <- c(0.5,0.3,0.2)

dat <- matrix(NA,nrow=N,ncol=K)
eta <- sample(1:M0,size = N,prob = pi_vec,replace = TRUE)
for (i in 1:N){
for (k in 1:K){
dat[i,k] <- rbinom(1,1,prob = p_mat[eta[i],k])
}
}

# tabulate the frequency of each multivariate binary pattern:
pattern_vec <- apply(dat,1,function(v) paste(v,collapse=""))

pattern_table <- matrix(round(table(pattern_vec)/N,3))*100
pattern_table <- cbind(names(table(pattern_vec)),pattern_table)
colnames(pattern_table) <- c("Pattern","Observed Frequency (100%)")

knitr::kable(
pattern_table, caption = 'Observed frequencies of multivariate binary patterns for $K=5$ depression symptoms (100%)'
)

Observed frequencies of multivariate binary patterns for $$K=5$$ depression symptoms (100%)

Pattern Observed Frequency (100%)
00000 5
00001 1.3
00010 3.3
00011 1.3
00100 2.3
00101 0.7
00110 2
00111 0.7
01000 30.3
01001 1.7
01010 4.7
01011 1
01100 2.7
01101 2
01110 2
01111 0.7
10000 2.7
10001 1.3
10010 2
10011 2
10100 1
10101 3
10110 2.7
10111 16.7
11000 2
11001 1
11010 1.7
11011 0.3
11100 1
11111 1
lor_obs <- function(x,y){
log(sum(x==1 & y==1))+log(sum(x==0 & y==0))-
log(sum(x==1 & y==0))-log(sum(x==0 & y==1))
}

psi_obs_mat <- matrix(NA,nrow=K,ncol=K)

for (i in 1:K){
for (j in 1:K){
if (i!=j){
psi_obs_mat[i,j] <- lor_obs(dat[,i],dat[,j])
}
}
}

# calculate the observed pairwise log odds ratios:
colnames(psi_obs_mat) <- rownames(psi_obs_mat) <- paste("Symptom",1:K,sep=" ")
knitr::kable(
round(psi_obs_mat,2), caption = 'Observed pariwise log odds ratios'
)

Observed pariwise log odds ratios

Symptom 1 Symptom 2 Symptom 3 Symptom 4 Symptom 5
Symptom 1 NA -2.49 1.99 1.86 2.39
Symptom 2 -2.49 NA -1.94 -1.85 -2.01
Symptom 3 1.99 -1.94 NA 1.73 2.23
Symptom 4 1.86 -1.85 1.73 NA 1.71
Symptom 5 2.39 -2.01 2.23 1.71 NA

4). Write out the likelihood

\begin{aligned} L(\boldsymbol{a},\boldsymbol{g}; \boldsymbol{Y}) & = \prod_{i=1}^N\left\{\sum_{j=1}^{M_{fit}}\frac{\exp(a_j)}{\sum_{\ell=1}^{M_{fit}}\exp(a_\ell)}\prod_{k=1}^K \frac{\exp(g_{jk}Y_{ik})}{1+\exp(g_{jk})}\right\}, \end{aligned} where $$a_{M_{fit}}=0$$.

5). Full Conditionals

The full conditional distributions are useful for designing Gibbs sampling algorithms. In the DAG, we identify the children and parents of the parameter under concern and multiply the conditional distributions that invole both the identified parent or child nodes and the parameter.

1. $[g_{jk}\mid \{g_{-j,-k}\}, \boldsymbol{\eta}, \boldsymbol{Y}] \propto [\boldsymbol{Y}\mid \boldsymbol{\eta},\boldsymbol{g}][g_{jk}] \propto \prod_{i=1}^N\prod_{k=1}^K\prod_{j=1}^{M_{fit}}\left\{\frac{\exp(g_{jk}Y_{ik})}{1+\exp(g_{jk})}\right\}^{\boldsymbol{1}\{\eta_i=j\}}\cdot \exp(-2g_{jk}^2/9)$

$[a_j \mid \{a_{-j}\},\boldsymbol{\eta}] \propto [\boldsymbol{\eta}\mid \boldsymbol{a}][a_j] \propto \prod_{i=1}^N \prod_{j=1}^{M_{fit}} \left\{\frac{\exp(a_j)}{\sum_{j=1}^{M_{fit}}\exp(a_j)}\right\}^{\boldsymbol{1}\{\eta_i=j\}}\cdot \exp(-2a^2_j/9)$

1. $[\eta_i \mid \boldsymbol{a},\boldsymbol{g},\boldsymbol{Y}] \propto [Y_i \mid \boldsymbol{\eta}, \boldsymbol{g}][\eta_i \mid \boldsymbol{a}] \propto \prod_{j=1}^{M_{fit}}\left\{\prod_{k=1}^K\frac{\exp(g_{jk}Y_{ik})}{1+\exp(g_{jk})}\cdot \frac{\exp(a_j)}{\sum_\ell \exp(a_\ell)}\right\}^{\boldsymbol{1}\{\eta_i = j\}}$

6). Bayesian Estimates

result_folder <- tempdir()
# fit Bayesian model:
mcmc_options <- list(debugstatus     = TRUE,
n.chains   = 1,
n.itermcmc = 10000,
n.burnin   = 5000,
n.thin     = 1,
result.folder = result_folder,
bugsmodel.dir = result_folder
)

# write .bug model file:
model_bugfile_name <- "model.bug"
filename   <- file.path(mcmc_options$bugsmodel.dir, model_bugfile_name) model_text <- "model{#BEGIN OF MODEL: for (i in 1:N){ for (k in 1:K){ Y[i,k] ~ dbern(p[eta[i],k]) } eta[i] ~ dcat(pi[1:M_fit]) } for (j in 1:(M_fit-1)){ exp0[j] <- exp(a[j]) a[j] ~ dnorm(0,4/9) } exp0[M_fit] <- 1 exp_sorted <- sort(exp0[1:M_fit]) for (j in 1:M_fit){ pi[j] <- exp_sorted[j]/sum(exp_sorted[1:M_fit]) for (k in 1:K){ p[j,k] <- 1/(1+exp(-g[j,k])) g[j,k] ~ dnorm(0,4/9) } } # generate new data: for (i in 1:N){ for (k in 1:K){ new_Y[i,k] ~ dbern(p[new_eta[i],k]) } new_eta[i] ~ dcat(pi[1:M_fit]) } } #END OF MODEL." writeLines(model_text, filename) # run jags: library(rjags) ## Loading required package: coda ## Linked to JAGS 4.2.0 ## Loaded modules: basemod,bugs load.module("glm") ## module glm loaded Y <- as.matrix(dat) M_fit <- 3 N <- 300 K <- 5 in_data <- c("Y","M_fit","N","K") out_parameter <- c("pi","p","new_Y") in_init <- function(){ list(a=c(0,0)) } out <- R2jags::jags2(data = in_data, inits = in_init, parameters.to.save = out_parameter, model.file = filename, working.directory = mcmc_options$result.folder,
n.iter         = as.integer(mcmc_options$n.itermcmc), n.burnin = as.integer(mcmc_options$n.burnin),
n.thin         = as.integer(mcmc_options$n.thin), n.chains = as.integer(mcmc_options$n.chains),
DIC            = FALSE,
clearWD        = FALSE)

We can then obtain the posterior samples from JAGS output:

# obtain the chain histories:
file.path(result_folder,"CODAindex.txt"),
quiet=TRUE)
print_res <- function(x,coda_res) plot(coda_res[,grep(x,colnames(coda_res))])
get_res   <- function(x,coda_res) coda_res[,grep(x,colnames(coda_res))]

For example, we can visualize the chains for $$\pi_1, \pi_2, \pi_3$$:

print_res("pi",res)

Chain histories for the three class probabilities

7). Visualize Posterior Distributions

Here I only show the posterior summaries of the key model parameters, class probabilities $$\boldsymbol{\pi}$$ and response probabilities $$\{p_{jk}\}_{j=1,k=1}^{M_{fit},K}$$. The posterior samples can be summarized as in the previous question.

ind_ord <- order(pi_vec)
retain_ind <- grep("^p",rownames(out$summary)) posterior_table <- cbind(c(pi_vec[ind_ord],p_mat[ind_ord,]),round(out$summary[retain_ind,],3))
colnames(posterior_table)[1] <- "truth"
knitr::kable(
posterior_table, caption = 'Posterior Sample Summaries for Selected Parameters'
)

Posterior Sample Summaries for Selected Parameters

truth mean sd 2.5% 25% 50% 75% 97.5%
pi[1] 0.20 0.308 0.018 0.263 0.299 0.312 0.321 0.330
pi[2] 0.30 0.337 0.011 0.316 0.330 0.335 0.342 0.362
pi[3] 0.50 0.356 0.014 0.336 0.345 0.353 0.364 0.391
p[1,1] 0.95 0.069 0.036 0.014 0.042 0.064 0.090 0.152
p[2,1] 0.40 0.247 0.065 0.120 0.202 0.246 0.290 0.375
p[3,1] 0.10 0.916 0.042 0.819 0.891 0.922 0.948 0.981
p[1,2] 0.10 0.911 0.049 0.798 0.881 0.919 0.948 0.983
p[2,2] 0.40 0.524 0.078 0.377 0.468 0.523 0.578 0.677
p[3,2] 0.90 0.068 0.030 0.020 0.046 0.064 0.085 0.135
p[1,3] 0.90 0.078 0.043 0.013 0.046 0.071 0.103 0.180
p[2,3] 0.45 0.314 0.062 0.192 0.272 0.315 0.355 0.434
p[3,3