FAQ + Gallery

Summary Tables

Add a spanning header over the group columns for increased clarity, and modify column headers. Using bold_labels() formats the labels as bold, but labels can also be italicized using italicize_labels(), or combined to format with both bold and italics.

trial %>%
  select(trt, age, grade) %>%
  tbl_summary(
    by = trt, 
    missing = "no",
    statistic = all_continuous() ~ "{median} ({p25}, {p75})"
  ) %>%
  modify_header(all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p)}%)") %>%
  add_n() %>%
  bold_labels() %>%
  modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")

Characteristic	N	Chemotherapy Treatment
Characteristic	N	Drug A N = 98 (49%)¹	Drug B N = 102 (51%)¹
Age	189	46 (37, 59)	48 (39, 56)
Grade	200
I		35 (36%)	33 (32%)
II		32 (33%)	36 (35%)
III		31 (32%)	33 (32%)
¹ Median (IQR); n (%)

Show continuous summary statistics on multiple lines. Levels are italicized here using the italicize_levels() function, but the bold_levels() function can be used instead to create bold text, or both functions can be used together to get text that is both bold and in italics.

trial %>%
  select(trt, age, marker) %>%
  tbl_summary(
    by = trt,
    type = all_continuous() ~ "continuous2",
    statistic = all_continuous() ~ c("{N_nonmiss}",
                                     "{mean} ({sd})", 
                                     "{median} ({p25}, {p75})", 
                                     "{min}, {max}"),
    missing = "no"
  ) %>%
  italicize_levels()

Characteristic	Drug A, N = 98	Drug B, N = 102
Age
N	91	98
Mean (SD)	47 (15)	47 (14)
Median (IQR)	46 (37, 59)	48 (39, 56)
Range	6, 78	9, 83
Marker Level (ng/mL)
N	92	98
Mean (SD)	1.02 (0.89)	0.82 (0.83)
Median (IQR)	0.84 (0.24, 1.57)	0.52 (0.19, 1.20)
Range	0.00, 3.87	0.01, 3.64

Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.

trial %>%
  select(response, age, grade) %>%
  mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
  tbl_summary(
    by = response, 
    missing = "no",
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  ) %>%
  add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
  add_q()

Characteristic	No Tumor Response, N = 132¹	Tumor Responded, N = 61¹	p-value²	q-value³
Patient Age	46 (36, 55)	49 (43, 59)	0.091	0.18
Tumor Grade			0.93	0.93
I	46 (35%)	21 (34%)
II	44 (33%)	19 (31%)
III	42 (32%)	21 (34%)
¹ Median (IQR); n (%)
² Wilcoxon rank sum test; Pearson's Chi-squared test
³ False discovery rate correction for multiple testing

Include missing tumor response as column using fct_explicit_na().

trial %>%
  select(response, age, grade) %>%
  mutate(
    response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>% 
      fct_explicit_na(na_level = "Missing Response Status")
  ) %>%
  tbl_summary(
    by = response, 
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  )

Characteristic	No Tumor Response, N = 132¹	Tumor Responded, N = 61¹	Missing Response Status, N = 7¹
Patient Age	46 (36, 55)	49 (43, 59)	52 (44, 57)
Unknown	7	3	1
Tumor Grade
I	46 (35%)	21 (34%)	1 (14%)
II	44 (33%)	19 (31%)	5 (71%)
III	42 (32%)	21 (34%)	1 (14%)
¹ Median (IQR); n (%)

Report treatment differences between two groups. This is often needed in randomized trials. In this example, we report the difference in tumor response and marker level between two chemotherapy treatments.

trial %>%
  select(response, marker, trt) %>%
  tbl_summary(
    by = trt,
    statistic = list(all_continuous() ~ "{mean} ({sd})",
                     all_categorical() ~ "{p}%"),
    missing = "no"
  ) %>%
  add_difference() %>%
  add_n() %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_footnote(all_stat_cols() ~ NA)

Characteristic	N	Drug A	Drug B	Difference¹	95% CI^1,2	p-value¹
Tumor Response	193	29%	34%	-4.2%	-18%, 9.9%	0.6
Marker Level (ng/mL)	190	1.02 (0.89)	0.82 (0.83)	0.20	-0.05, 0.44	0.12
¹ Two sample test for equality of proportions; Welch Two Sample t-test
² CI = Confidence Interval

Paired t-test and McNemar’s test. The data is expected in a long format with 2 rows per participant.

# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements)
trial_paired <-
  trial %>%
  select(trt, marker, response) %>%
  group_by(trt) %>%
  mutate(id = row_number()) %>%
  ungroup()

# you must first delete incomplete pairs from the data, then you can build the table
trial_paired %>%
  # delete missing values
  filter(complete.cases(.)) %>%
  # keep IDs with both measurements
  group_by(id) %>%
  filter(n() == 2) %>%
  ungroup() %>%
  # summarize data
  tbl_summary(by = trt, include = -id) %>%
  add_p(test = list(marker ~ "paired.t.test",
                    response ~ "mcnemar.test"), 
        group = id)

Characteristic	Drug A, N = 83¹	Drug B, N = 83¹	p-value²
Marker Level (ng/mL)	0.82 (0.22, 1.63)	0.53 (0.18, 1.26)	0.2
Tumor Response	21 (25%)	28 (34%)	0.3
¹ Median (IQR); n (%)
² Paired t-test; McNemar's Chi-squared test with continuity correction

Include p-values comparing all groups to a single reference group.

# table summarizing data with no p-values
small_trial <- trial %>% select(grade, age, response)
t0 <- small_trial %>%
  tbl_summary(by = grade, missing = "no") %>%
  modify_header(all_stat_cols() ~ "**{level}**")

# table comparing grade I and II
t1 <- small_trial %>%
  filter(grade %in% c("I", "II")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p() %>%
  modify_header(p.value ~ md("**I vs. II**")) %>%
  # hide summary stat columns
  modify_column_hide(all_stat_cols())

# table comparing grade I and II
t2 <- small_trial %>%
  filter(grade %in% c("I", "III")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p()  %>%
  modify_header(p.value ~ md("**I vs. III**")) %>%
  # hide summary stat columns
  modify_column_hide(all_stat_cols())

# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
  modify_spanning_header(
    list(
      all_stat_cols() ~ "**Tumor Grade**",
      starts_with("p.value") ~ "**p-values**"
    )
  )

Characteristic	Tumor Grade			p-values
Characteristic	I¹	II¹	III¹	I vs. II²	I vs. III²
Age	47 (37, 56)	48 (37, 57)	47 (38, 58)	0.7	0.5
Tumor Response	21 (31%)	19 (30%)	21 (33%)	>0.9	0.9
¹ Median (IQR); n (%)
² Wilcoxon rank sum test; Fisher's exact test

Add 95% confidence interval around the mean as an additional column


trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
  modify_header(stat_0 ~ "**Mean (SD)**") %>%
  add_ci()

Characteristic	Mean (SD)¹	95% CI²
Age	47 (14)	45, 49
Marker Level (ng/mL)	0.92 (0.86)	0.79, 1.0
¹ Mean (SD)
² CI = Confidence Interval

It’s often needed to summarize a continuous variable by one, two, or more categorical variables. The example below shows a table summarizing a continuous variable by two categorical variables. To summarize by more than two categorical variables, use tbl_continuous in conjunction with tbl_strata (see an example of tbl_strata here).

trial %>%
  select(trt, grade, marker) %>%
  tbl_continuous(variable = marker, by = trt) %>%
  modify_spanning_header(all_stat_cols() ~ "**Treatment Assignment**")

Characteristic	Treatment Assignment
Characteristic	Drug A, N = 98¹	Drug B, N = 102¹
Grade
I	0.96 (0.24, 1.70)	1.05 (0.29, 1.49)
II	0.66 (0.31, 1.23)	0.21 (0.10, 0.94)
III	0.84 (0.17, 1.91)	0.58 (0.35, 1.36)
¹ Marker Level (ng/mL): Median (IQR)

Build a summary table stratified by more than one variable.

trial %>%
  select(trt, grade, age, stage) %>%
  mutate(grade = paste("Grade", grade)) %>%
  tbl_strata(
    strata = grade, 
    ~.x %>%
      tbl_summary(by = trt, missing = "no") %>%
      modify_header(all_stat_cols() ~ "**{level}**")
  )

Characteristic	Grade I		Grade II		Grade III
Characteristic	Drug A¹	Drug B¹	Drug A¹	Drug B¹	Drug A¹	Drug B¹
Age	46 (36, 60)	48 (42, 55)	44 (31, 54)	50 (43, 57)	52 (42, 60)	45 (36, 52)
T Stage
T1	8 (23%)	9 (27%)	14 (44%)	9 (25%)	6 (19%)	7 (21%)
T2	8 (23%)	10 (30%)	8 (25%)	9 (25%)	9 (29%)	10 (30%)
T3	11 (31%)	7 (21%)	5 (16%)	6 (17%)	6 (19%)	8 (24%)
T4	8 (23%)	7 (21%)	5 (16%)	12 (33%)	10 (32%)	8 (24%)
¹ Median (IQR); n (%)

Regression Tables

Include number of observations and the number of events in a univariate regression table.

trial %>%
  select(response, age, grade) %>%
  tbl_uvregression(
    method = glm,
    y = response, 
    method.args = list(family = binomial),
    exponentiate = TRUE
  ) %>%
  add_nevent()

Characteristic	N	Event N	OR¹	95% CI¹	p-value
Age	183	58	1.02	1.00, 1.04	0.10
Grade	193	61
I			—	—
II			0.95	0.45, 2.00	0.9
III			1.10	0.52, 2.29	0.8
¹ OR = Odds Ratio, CI = Confidence Interval

Include two related models side-by-side with descriptive statistics. We also use the compact table theme that reduces cell padding and font size.

gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>%
  tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>%
  tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("trt", "grade")] %>% 
  tbl_summary(missing = "no") %>% 
  add_n() %>%
  modify_header(stat_0 ~ "**n (%)**") %>%
  modify_footnote(stat_0 ~ NA_character_)

theme_gtsummary_compact()
#> Setting theme `Compact`
tbl_merge(
  list(gt_t1, gt_r1, gt_r2),
  tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)

Characteristic	N	n (%)	Tumor Response			Time to Death
Characteristic	N	n (%)	OR¹	95% CI¹	p-value	HR¹	95% CI¹	p-value
Chemotherapy Treatment	200
Drug A		98 (49%)	—	—		—	—
Drug B		102 (51%)	1.21	0.66, 2.24	0.5	1.25	0.86, 1.81	0.2
Grade	200
I		68 (34%)	—	—		—	—
II		68 (34%)	0.94	0.44, 1.98	0.9	1.28	0.80, 2.06	0.3
III		64 (32%)	1.09	0.52, 2.27	0.8	1.69	1.07, 2.66	0.024
¹ OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio

Include the number of events at each level of a categorical predictor.

trial %>%
  select(ttdeath, death, stage, grade) %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death), 
    exponentiate = TRUE,
    hide_n = TRUE
  ) %>%
  add_nevent(location = "level")

Characteristic	Event N	HR¹	95% CI¹	p-value
T Stage
T1	24	—	—
T2	27	1.18	0.68, 2.04	0.6
T3	22	1.23	0.69, 2.20	0.5
T4	39	2.48	1.49, 4.14	<0.001
Grade
I	33	—	—
II	36	1.28	0.80, 2.05	0.3
III	43	1.69	1.07, 2.66	0.024
¹ HR = Hazard Ratio, CI = Confidence Interval

Regression model where the covariate remains the same, and the outcome changes.

trial %>%
  select(age, marker, trt) %>%
  tbl_uvregression(
    method = lm,
    x = trt,
    show_single_row = "trt",
    hide_n = TRUE
  ) %>%
  modify_header(list(
    label ~"**Model Outcome**",
    estimate ~ "**Treatment Coef.**"
  )) %>%
  modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug B group.")

Model Outcome	Treatment Coef.¹	95% CI²	p-value
Age	0.44	-3.7, 4.6	0.8
Marker Level (ng/mL)	-0.20	-0.44, 0.05	0.12
¹ Values larger than 0 indicate larger values in the Drug B group.
² CI = Confidence Interval

Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().

my_tidy <- function(x, exponentiate =  FALSE, conf.level = 0.95, ...) {
  dplyr::bind_cols(
    broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
    # calculate the confidence intervals, and save them in a tibble
    stats::confint.default(x) %>%
      tibble::as_tibble() %>%
      rlang::set_names(c("conf.low", "conf.high"))  )
}

lm(age ~ grade + marker, trial) %>%
  tbl_regression(tidy_fun = my_tidy)

Characteristic	Beta	95% CI¹	p-value
Grade
I	—	—
II	0.64	-4.6, 5.9	0.8
III	2.4	-2.8, 7.6	0.4
Marker Level (ng/mL)	-0.04	-2.6, 2.5	>0.9
¹ CI = Confidence Interval

Use significance stars on estimates with low p-values.

trial %>%
  select(ttdeath, death, stage, grade) %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death), 
    exponentiate = TRUE,
  ) %>%
  add_significance_stars()

Characteristic	N	HR^1,2	SE²
T Stage	200
T1		—	—
T2		1.18	0.281
T3		1.23	0.295
T4		2.48***	0.260
Grade	200
I		—	—
II		1.28	0.241
III		1.69*	0.232
¹ p<0.05; p<0.01; **p<0.001
² HR = Hazard Ratio, SE = Standard Error

FAQ + Gallery

Frequently Asked Questions

Data Summary Tables

Regression Tables

Summary Tables

Regression Tables