options(readr.show_col_types = FALSE)
source("read_qualtrics_data.R")
d <- load_data("data/pilot/TPSai_17+March+2026_16.59.csv")
cols_tpsai <- d$cols_tpsai
# switch to d$items_data after data collection
cfa_data <- read_csv("data/pilot/simulated_cfa_data.csv") %>%
select(all_of(cols_tpsai))Confirmatory Factor Analysis (CFA)
Confirmatory factor analysis (CFA) tests whether a hypothesised factor structure — derived from theory and/or EFA results — replicates in the data. Unlike EFA, which lets the data reveal structure freely, CFA is a hypothesis-testing procedure: we specify in advance which items load onto which factors and which cross-loadings are constrained to zero, then evaluate how well that model fits the observed covariance matrix.
We use the WLSMV (weighted least squares, mean- and variance-adjusted) estimator throughout. WLSMV is the recommended estimator for ordinal Likert data because it treats items as ordered-categorical rather than continuous, avoiding the underestimation of factor loadings and the distorted fit indices that arise when standard maximum-likelihood estimation is applied to non-normal ordinal responses.
Build Models
trust_1f_model <- cfa(paste("TR =~", paste(d$cols_trust, collapse = " + ")),
cfa_data, estimator = "WLSMV", ordered = TRUE)
distrust_1f_model <- cfa(paste("DT =~", paste(d$cols_distrust, collapse = " + "), " + ",
paste(d$cols_company, collapse = " + ")),
cfa_data, estimator = "WLSMV", ordered = TRUE)trust_efa_eq <- '
MR1 =~ TR_02 + TR_04 + TR_06 + TR_09 + TR_10 + TR_12 + TR_13 + TR_14 + TR_15 + TR_16 + TR_18 + TR_20 + TR_22
MR2 =~ TR_01 + TR_03 + TR_05 + TR_07 + TR_08 + TR_11 + TR_17 + TR_19 + TR_21
'
trust_efa_model <- cfa(trust_efa_eq,
cfa_data, estimator = "WLSMV", ordered = TRUE)
distrust_efa_eq <- '
MR1 =~ DT_01 + DT_02 + DT_04 + DT_06 + DT_10 + DT_11 + DT_13 + DT_14 + CD_04 + CD_05
MR2 =~ DT_03 + DT_05 + DT_07 + DT_08 + DT_09 + DT_12 + CD_01 + CD_02 + CD_03 + CD_06 + CD_07
'
distrust_efa_model <- cfa(distrust_efa_eq,
cfa_data, estimator = "WLSMV", ordered = TRUE)Model Fit
We evaluate fit using the indices recommended by Boateng et al. (2018) and (briones2026?). No single index is sufficient; good fit requires convergent evidence across indices.
| Index | Good fit | Acceptable | Poor fit |
|---|---|---|---|
| CFI | ≥ .95 | .90–.94 | < .90 |
| TLI | ≥ .95 | .90–.94 | < .90 |
| RMSEA | ≤ .06 | .06–.08 | > .10 |
| SRMR | ≤ .06 | .06–.08 | > .08 |
extract_fit <- function(fit, subscale, model_label) {
fi <- fitMeasures(fit, c("chisq", "df", "pvalue",
"cfi", "tli",
"rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
"srmr"))
tibble(
Subscale = subscale,
Model = model_label,
`χ²` = round(fi["chisq"], 2),
df = fi["df"],
# p = round(fi["pvalue"], 3),
CFI = round(fi["cfi"], 3),
TLI = round(fi["tli"], 3),
RMSEA = round(fi["rmsea"], 3),
`90% CI` = paste0("[", round(fi["rmsea.ci.lower"], 3),
", ", round(fi["rmsea.ci.upper"], 3), "]"),
SRMR = round(fi["srmr"], 3)
)
}
bind_rows(
extract_fit(trust_1f_model, "Trust", "1F"),
extract_fit(trust_efa_model, "Trust", "EFA"),
extract_fit(distrust_1f_model, "Disrust", "1F"),
extract_fit(distrust_efa_model, "Disrust", "EFA"),
) %>%
kable()| Subscale | Model | χ² | df | CFI | TLI | RMSEA | 90% CI | SRMR |
|---|---|---|---|---|---|---|---|---|
| Trust | 1F | 5696.62 | 209 | 0.656 | 0.619 | 0.296 | [0.29, 0.303] | 0.239 |
| Trust | EFA | 5135.44 | 208 | 0.691 | 0.657 | 0.281 | [0.275, 0.288] | 0.230 |
| Disrust | 1F | 6075.73 | 189 | 0.573 | 0.525 | 0.323 | [0.316, 0.33] | 0.260 |
| Disrust | EFA | 5061.13 | 188 | 0.646 | 0.605 | 0.294 | [0.287, 0.301] | 0.240 |
We can also compare models with a scaled chi-square difference test.
We prefer more complex models when both criteria are met: the difference test is significant (p < .05) and ΔCFI > .01. Using both criteria guards against retaining a complex model purely on the basis of a statistically significant but practically trivial improvement in fit.
lavTestLRT(trust_1f_model, trust_efa_model, model.names = c("1F", "EFA"))
Scaled Chi-Squared Difference Test (method = "satorra.2000")
lavaan->lavTestLRT():
lavaan NOTE: The "Chisq" column contains standard test statistics, not the
robust test that should be reported per model. A robust difference test is
a function of two standard (not robust) statistics.
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
EFA 208 5135.4
1F 209 5696.6 127.28 1.3665 1 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lavTestLRT(distrust_1f_model, distrust_efa_model, model.names = c("1F", "EFA"))
Scaled Chi-Squared Difference Test (method = "satorra.2000")
lavaan->lavTestLRT():
lavaan NOTE: The "Chisq" column contains standard test statistics, not the
robust test that should be reported per model. A robust difference test is
a function of two standard (not robust) statistics.
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
EFA 188 5061.1
1F 189 6075.7 145.13 1.8381 1 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1