Forest plots for regression models

Forest plots are not limited to meta-analyses. Any table of point estimates with confidence intervals can be displayed as a forest plot — results from regression models, dose-response analyses, multi-outcome studies, and more.

This vignette shows how to go from a fitted R model to a forest plot in a few lines of code. broom::tidy() extracts parameters; broom is not a hard dependency and may be replaced by any function that returns a data frame with the right columns. marginaleffects powers the causal-inference examples in the final sections.


Simulated cohort

set.seed(42)
n <- 800

cohort <- data.frame(
  age      = round(runif(n, 30, 75)),
  female   = rbinom(n, 1, 0.52),
  bmi      = rnorm(n, 26, 4.5),
  smoker   = rbinom(n, 1, 0.20),
  activity = rbinom(n, 1, 0.45),   # 1 = physically active
  educ     = sample(c("Low", "Medium", "High"), n,
                    replace = TRUE, prob = c(0.25, 0.45, 0.30))
)

# Systolic blood pressure (mmHg): continuous outcome with true signal
cohort$sbp <- 110 +
  0.40 * cohort$age    -
  4.50 * cohort$female +
  0.50 * cohort$bmi    -
  2.00 * cohort$smoker +
  2.50 * cohort$activity +
  rnorm(n, sd = 8)

# Hypertension (SBP ≥ 140): binary outcome
cohort$htn <- as.integer(cohort$sbp >= 140)

# Education as ordered factor (for dose-response example)
cohort$educ <- factor(cohort$educ,
                      levels = c("Low", "Medium", "High"), ordered = FALSE)
cohort$educ <- relevel(cohort$educ, ref = "Low")

Multiple predictors from one model

The simplest workflow: fit a model, call broom::tidy(conf.int = TRUE), and pass the result to forrest(). Column names estimate, conf.low, and conf.high map directly to the estimate, lower, and upper arguments.

fit_lm <- lm(sbp ~ age + female + bmi + smoker + activity, data = cohort)

coefs <- broom::tidy(fit_lm, conf.int = TRUE)
coefs <- coefs[coefs$term != "(Intercept)", ]
coefs$term <- c(
  "Age (per 1 year)",
  "Female sex",
  "BMI (per 1 kg/m\u00b2)",
  "Current smoker",
  "Physically active"
)

# Formatted coefficient + CI text for right-hand column
coefs$coef_ci <- sprintf(
  "%.2f (%.2f, %.2f)",
  coefs$estimate, coefs$conf.low, coefs$conf.high
)

forrest(
  coefs,
  estimate = "estimate",
  lower    = "conf.low",
  upper    = "conf.high",
  label    = "term",
  header   = "Predictor",
  cols     = c("Coef (95% CI)" = "coef_ci"),
  widths   = c(3, 4, 2.5),
  xlab     = "Regression coefficient (95% CI)",
  stripe   = TRUE
)

For logistic regression, set exponentiate = TRUE in broom::tidy() to get odds ratios directly, then set log_scale = TRUE and ref_line = 1 in forrest():

fit_glm <- glm(htn ~ age + female + bmi + smoker + activity,
               data = cohort, family = binomial)

or_coefs <- broom::tidy(fit_glm, conf.int = TRUE, exponentiate = TRUE)
or_coefs <- or_coefs[or_coefs$term != "(Intercept)", ]
or_coefs$term <- c(
  "Age (per 1 year)",
  "Female sex",
  "BMI (per 1 kg/m\u00b2)",
  "Current smoker",
  "Physically active"
)

or_coefs$or_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  or_coefs$estimate, or_coefs$conf.low, or_coefs$conf.high
)

forrest(
  or_coefs,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "term",
  log_scale  = TRUE,
  ref_line   = 1,
  header     = "Predictor",
  cols       = c("OR (95% CI)" = "or_text"),
  widths     = c(3, 4, 2),
  xlab       = "Odds ratio (95% CI)"
)


Same predictor across progressive adjustment models

A common reporting pattern is to show how an association changes as covariates are progressively added.

m1 <- glm(htn ~ activity,
          data = cohort, family = binomial)
m2 <- glm(htn ~ activity + age + female,
          data = cohort, family = binomial)
m3 <- glm(htn ~ activity + age + female + bmi + smoker,
          data = cohort, family = binomial)

extract_activity <- function(mod, label) {
  row         <- broom::tidy(mod, conf.int = TRUE, exponentiate = TRUE)
  row         <- row[row$term == "activity", ]
  row$model   <- label
  row
}

adj_dat <- rbind(
  extract_activity(m1, "Model 1: unadjusted"),
  extract_activity(m2, "Model 2: + age, sex"),
  extract_activity(m3, "Model 3: + BMI, smoking")
)

adj_dat$or_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  adj_dat$estimate, adj_dat$conf.low, adj_dat$conf.high
)

forrest(
  adj_dat,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "model",
  log_scale  = TRUE,
  ref_line   = 1,
  header     = "Adjustment set",
  cols       = c("OR (95% CI)" = "or_text"),
  widths     = c(4, 3.5, 2),
  xlab       = "OR for physical activity on hypertension (95% CI)"
)


Same predictor across multiple outcomes

To compare one exposure against several outcomes, loop over outcomes, extract the exposure row from each model, and stack into a single data frame. Use group to colour by outcome domain.

outcomes <- list(
  list(var = "sbp",      label = "Systolic BP (mmHg)",    domain = "Continuous"),
  list(var = "htn",      label = "Hypertension",          domain = "Binary"),
  list(var = "bmi",      label = "BMI (kg/m\u00b2)",      domain = "Continuous"),
  list(var = "smoker",   label = "Current smoker",        domain = "Binary")
)

rows <- lapply(outcomes, function(o) {
  if (o$domain == "Continuous") {
    fit <- lm(as.formula(paste(o$var, "~ activity + age + female")),
              data = cohort)
    r   <- broom::tidy(fit, conf.int = TRUE)
  } else {
    fit <- glm(as.formula(paste(o$var, "~ activity + age + female")),
               data = cohort, family = binomial)
    r   <- broom::tidy(fit, conf.int = TRUE, exponentiate = TRUE)
  }
  r          <- r[r$term == "activity", ]
  r$outcome  <- o$label
  r$domain   <- o$domain
  r
})

multi_out <- do.call(rbind, rows)
multi_out$est_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  multi_out$estimate,
  multi_out$conf.low,
  multi_out$conf.high
)

forrest(
  multi_out,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "outcome",
  group      = "domain",
  ref_line   = 0,
  legend_pos = "topright",
  header     = "Outcome",
  cols       = c("Estimate (95% CI)" = "est_text"),
  widths     = c(3.5, 3.5, 2.5),
  xlab       = "Effect of physical activity (coef or OR, 95% CI)"
)

Note: because continuous and binary outcomes live on different scales, a single reference line may not be ideal for all rows. Consider splitting into two separate panels, or use ref_line = NULL.


Dose-response (exposure categories)

For categorical exposures (quartiles, tertiles, and so on), the reference category has estimate = NA because there is no contrast to estimate. forrest() renders this row without a CI or point but keeps the label, making the reference position visually clear. Supply a text column to print "1.00 (ref)" in the right-hand panel. Set ref_label = TRUE to have forrest() append " (Ref.)" to the label automatically.

# Quartiles of age
cohort$age_q <- cut(
  cohort$age,
  breaks         = quantile(cohort$age, 0:4 / 4),
  include.lowest = TRUE,
  labels         = c("Q1 (\u226445 y)", "Q2 (45\u201356 y)",
                     "Q3 (56\u201366 y)", "Q4 (>66 y)")
)
cohort$age_q <- relevel(cohort$age_q, ref = "Q1 (\u226445 y)")

fit_q   <- glm(htn ~ age_q + female + bmi + smoker + activity,
               data = cohort, family = binomial)
q_coefs <- broom::tidy(fit_q, conf.int = TRUE, exponentiate = TRUE)
q_coefs <- q_coefs[grep("^age_q", q_coefs$term), ]

q_plot <- rbind(
  data.frame(
    label     = "Q1 (\u226445 y, ref)",
    estimate  = NA_real_,
    conf.low  = NA_real_,
    conf.high = NA_real_,
    or_ci     = "1.00 (ref)",
    is_sum    = FALSE
  ),
  data.frame(
    label     = gsub("^age_q", "", q_coefs$term),
    estimate  = q_coefs$estimate,
    conf.low  = q_coefs$conf.low,
    conf.high = q_coefs$conf.high,
    or_ci     = sprintf("%.2f (%.2f, %.2f)",
                        q_coefs$estimate,
                        q_coefs$conf.low,
                        q_coefs$conf.high),
    is_sum    = FALSE
  )
)

forrest(
  q_plot,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "label",
  is_summary = "is_sum",
  log_scale  = TRUE,
  ref_line   = 1,
  header     = "Age quartile",
  cols       = c("OR (95% CI)" = "or_ci"),
  widths     = c(3, 4, 2),
  xlab       = "OR for hypertension (95% CI)"
)


Marginal effects: g-computation

Parametric g-computation estimates the average treatment effect (ATE) by predicting outcomes under two counterfactual worlds — everyone treated vs. no one treated — and averaging the difference. marginaleffects::avg_comparisons() implements this in one line.

The example below estimates the marginal effect of physical activity on SBP, overall and stratified by sex and education level.

library(marginaleffects)

# Outcome model with treatment × covariate interactions so that
# g-computation captures heterogeneous treatment effects
fit_sbp <- lm(
  sbp ~ activity * (age + female + bmi + smoker),
  data = cohort
)

# Helper: extract one avg_comparisons result into a plot row
make_row <- function(me, label, is_sum = FALSE) {
  data.frame(
    label    = label,
    estimate = me$estimate,
    lower    = me$conf.low,
    upper    = me$conf.high,
    is_sum   = is_sum
  )
}

# Overall ATE
ate_sbp <- avg_comparisons(fit_sbp, variables = "activity")

# ATE by sex: subset newdata to each stratum
sex_rows <- lapply(
  list(`  Female` = 0, `  Male` = 1),
  function(f) avg_comparisons(fit_sbp, variables = "activity",
                               newdata = subset(cohort, female == f))
)

# ATE by education level
educ_rows <- setNames(
  lapply(c("Low", "Medium", "High"), function(e)
    avg_comparisons(fit_sbp, variables = "activity",
                    newdata = subset(cohort, educ == e))),
  c("  Low", "  Medium", "  High")
)

# Stack into a single data frame with a stratum column for section headers.
# No manual header or spacer rows needed.
gcomp_dat <- rbind(
  make_row(ate_sbp, "Overall (ATE)", is_sum = TRUE) |>
    transform(stratum = "Overall"),
  do.call(rbind, lapply(
    list(list(label = "Female", f = 0), list(label = "Male", f = 1)),
    function(s) {
      r <- make_row(avg_comparisons(fit_sbp, variables = "activity",
                                    newdata = subset(cohort, female == s$f)),
                   label = s$label)
      r$stratum <- "Sex"
      r
    }
  )),
  do.call(rbind, lapply(
    list(list(label = "Low", e = "Low"),
         list(label = "Medium", e = "Medium"),
         list(label = "High",   e = "High")),
    function(s) {
      r <- make_row(avg_comparisons(fit_sbp, variables = "activity",
                                    newdata = subset(cohort, educ == s$e)),
                   label = s$label)
      r$stratum <- "Education level"
      r
    }
  ))
)

gcomp_dat$est_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  gcomp_dat$estimate, gcomp_dat$lower, gcomp_dat$upper
)

forrest(
  gcomp_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "label",
  section    = "stratum",
  is_summary = "is_sum",
  cols       = c("Marginal effect (95% CI)" = "est_text"),
  widths     = c(3.5, 4, 3),
  ref_line   = 0,
  header     = "Stratum",
  xlab       = "Marginal effect on SBP (mmHg): physical activity (g-computation)"
)


Marginal effects: inverse probability weighting (IPW)

IPW re-weights observations by the inverse of their propensity score so that the weighted sample resembles a randomised trial with respect to observed confounders. The example below uses marginaleffects to implement the Hajek estimator and compares it to g-computation for five strata (overall + sex + education). Using group and dodge, both sets of estimates are displayed side-by-side in one figure.

# ── Step 1: propensity score model ────────────────────────────────────────
ps_mod       <- glm(activity ~ age + female + bmi + smoker + educ,
                    data = cohort, family = binomial)
cohort$ps    <- predict(ps_mod, type = "response")
cohort$ipw   <- ifelse(cohort$activity == 1,
                       1 / cohort$ps,
                       1 / (1 - cohort$ps))

# Trim extreme weights at the 1st / 99th percentile for numerical stability
cohort$ipw_t <- pmin(pmax(cohort$ipw,
                           quantile(cohort$ipw, 0.01)),
                     quantile(cohort$ipw, 0.99))

# ── Step 2: IPW-weighted outcome model ────────────────────────────────────
fit_sbp_ipw <- lm(
  sbp ~ activity * (age + female + bmi + smoker),
  data = cohort, weights = ipw_t
)

# ── Step 3: build comparison rows — no header rows, just labelled strata ──
# Each stratum appears twice: once for each method.  The dodge layout
# places the two symbols side-by-side so CIs can be read off the same axis.
strata <- list(
  list(label = "Overall",         nd = cohort,                       is_sum = TRUE),
  list(label = "  Female",        nd = subset(cohort, female == 0),  is_sum = FALSE),
  list(label = "  Male",          nd = subset(cohort, female == 1),  is_sum = FALSE),
  list(label = "  Low educ.",     nd = subset(cohort, educ == "Low"), is_sum = FALSE),
  list(label = "  High educ.",    nd = subset(cohort, educ == "High"),is_sum = FALSE)
)

rows <- do.call(rbind, lapply(strata, function(s) {
  # G-computation: standard avg_comparisons on the interaction model
  gc  <- avg_comparisons(fit_sbp,     variables = "activity",
                          newdata = s$nd)
  # IPW (Hajek): weighted outcome model + Hajek normalisation via `wts`
  ipw <- avg_comparisons(fit_sbp_ipw, variables = "activity",
                          wts = "ipw_t", newdata = s$nd)
  rbind(
    data.frame(label = s$label, method = "G-computation",
               estimate = gc$estimate,  lower = gc$conf.low,
               upper    = gc$conf.high, is_sum = s$is_sum),
    data.frame(label = s$label, method = "IPW (Hajek)",
               estimate = ipw$estimate, lower = ipw$conf.low,
               upper    = ipw$conf.high, is_sum = s$is_sum)
  )
}))

# Separate text columns per method for cols_by_group display
rows$text_gc  <- ifelse(rows$method == "G-computation",
                         sprintf("%.2f (%.2f, %.2f)",
                                 rows$estimate, rows$lower, rows$upper), "")
rows$text_ipw <- ifelse(rows$method == "IPW (Hajek)",
                         sprintf("%.2f (%.2f, %.2f)",
                                 rows$estimate, rows$lower, rows$upper), "")

forrest(
  rows,
  estimate      = "estimate",
  lower         = "lower",
  upper         = "upper",
  label         = "label",
  group         = "method",
  is_summary    = "is_sum",
  dodge         = TRUE,
  cols_by_group = TRUE,
  cols          = c("G-comp (95% CI)" = "text_gc",
                    "IPW (95% CI)"    = "text_ipw"),
  widths        = c(3, 3.5, 2.5, 2.5),
  ref_line      = 0,
  legend_pos    = "topright",
  header        = "Subgroup",
  xlab          = "Marginal effect on SBP (mmHg): physical activity (95% CI)"
)


Time-varying exposures and longitudinal studies

Any analysis that produces estimates at multiple time points — distributed lag models, life-course analyses, repeated measures, cumulative exposure windows — yields a data frame where time is just another dimension. The same section / dodge / group building blocks that handle predictor groups and subgroup analyses apply here without modification.

Distributed lag models

A distributed lag model (DLM) estimates the association between an exposure and an outcome separately at each lag. The result is one row per lag per exposure. Use section = "exposure" to group lags under each exposure, and add an overall (cumulative) estimate as an is_summary = TRUE diamond at the bottom of each section.

set.seed(2025)

# Simulate lag-specific estimates for three environmental exposures
# Each exposure has a distinct lag-response shape
make_lags <- function(pattern) {
  se  <- runif(length(pattern), 0.03, 0.06)
  data.frame(
    estimate = pattern + rnorm(length(pattern), sd = 0.02),
    lower    = pattern - 1.96 * se,
    upper    = pattern + 1.96 * se
  )
}

pm25  <- make_lags(c(0.04, 0.11, 0.10, 0.06, 0.03,  0.01,  0.00))
noise <- make_lags(c(0.09, 0.05,  0.02, 0.01, 0.00, -0.01,  0.00))
green <- make_lags(c(-0.02, -0.04, -0.07, -0.09, -0.10, -0.09, -0.08))

lag_labels <- paste("Lag", 0:6, "(weeks)")

dlm_dat <- rbind(
  data.frame(exposure = "PM2.5",        lag = lag_labels, pm25,  is_sum = FALSE),
  data.frame(exposure = "PM2.5",        lag = "Cumulative", estimate = 0.35,
             lower = 0.18, upper = 0.52, is_sum = TRUE),
  data.frame(exposure = "Road noise",   lag = lag_labels, noise, is_sum = FALSE),
  data.frame(exposure = "Road noise",   lag = "Cumulative", estimate = 0.16,
             lower = 0.04, upper = 0.28, is_sum = TRUE),
  data.frame(exposure = "Green space",  lag = lag_labels, green, is_sum = FALSE),
  data.frame(exposure = "Green space",  lag = "Cumulative", estimate = -0.49,
             lower = -0.68, upper = -0.30, is_sum = TRUE)
)

dlm_dat$est_text <- ifelse(
  dlm_dat$is_sum,
  sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper),
  sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper)
)

forrest(
  dlm_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "lag",
  section    = "exposure",
  is_summary = "is_sum",
  ref_line   = 0,
  header     = "Lag",
  cols       = c("Coef (95% CI)" = "est_text"),
  widths     = c(3.5, 4, 2.8),
  xlab       = "Regression coefficient per IQR increase (95% CI)"
)

Life-course analysis: same exposure across developmental periods

A life-course design estimates the same exposure–outcome association at multiple developmental stages. Use section = "life_stage" to organise the stages as named groups, with one row per exposure within each stage.

set.seed(99)

make_row <- function(exposure, life_stage, est, se) {
  data.frame(
    life_stage = life_stage,
    exposure   = exposure,
    estimate   = est  + rnorm(1, sd = 0.01),
    lower      = est  - 1.96 * se,
    upper      = est  + 1.96 * se
  )
}

lc_dat <- rbind(
  make_row("Noise exposure",         "Prenatal",        0.08, 0.03),
  make_row("Green space access",     "Prenatal",       -0.05, 0.04),
  make_row("Traffic-related air",    "Prenatal",        0.06, 0.03),
  make_row("Noise exposure",         "Early childhood", 0.12, 0.04),
  make_row("Green space access",     "Early childhood",-0.09, 0.05),
  make_row("Traffic-related air",    "Early childhood", 0.10, 0.04),
  make_row("Noise exposure",         "Mid-childhood",   0.15, 0.05),
  make_row("Green space access",     "Mid-childhood",  -0.13, 0.05),
  make_row("Traffic-related air",    "Mid-childhood",   0.14, 0.05),
  make_row("Noise exposure",         "Adolescence",     0.09, 0.04),
  make_row("Green space access",     "Adolescence",    -0.07, 0.05),
  make_row("Traffic-related air",    "Adolescence",     0.08, 0.04)
)

forrest(
  lc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  section  = "life_stage",
  group    = "exposure",
  ref_line = 0,
  header   = "Exposure",
  xlab     = "Coefficient per IQR increase in SBP (95% CI)"
)

Life-course analysis: comparing stages side by side (dodge)

When the question is how does the association at one developmental stage compare to another for the same exposure, flip the structure: label is the exposure, group is the life stage, and dodge = TRUE places all stages side by side within each exposure row.

# Same data — one row per exposure × life stage
forrest(
  lc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  group    = "life_stage",
  dodge    = TRUE,
  ref_line = 0,
  header   = "Exposure",
  xlab     = "Coefficient per IQR increase in SBP (95% CI)"
)


Saving plots

save_forrest(
  file   = "regression_forest.pdf",
  plot   = function() {
    forrest(
      coefs,
      estimate = "estimate",
      lower    = "conf.low",
      upper    = "conf.high",
      label    = "term",
      stripe   = TRUE,
      xlab     = "Regression coefficient (95% CI)"
    )
  },
  width  = 9,
  height = 5
)