adsl <- data.frame(
studyid = c("MYCSG", "MYCSG", "MYCSG", "MYCSG", "MYCSG", "MYCSG"),
usubjid = c("MYCSG-1001", "MYCSG-1002", "MYCSG-1003", "MYCSG-1004", "MYCSG-1006", "MYCSG-1007"),
trt01pn = c(1, 3, 3, 3, 3, 1),
age = c(23, 68, NA, 35, 54, 63),
fasfl = c("Y", "Y", "Y", "Y", "Y", "N"),
trt01p = c("Dose level 1", "Dose level 3", "Dose level 3", "Dose level 3", "Dose level 3", "Dose level 1")
, stringsAsFactors = FALSE
)
#==============================================================================
# Base R summary (aggregate) with intermediate names + final tidyverse-style names
#==============================================================================
stats01 <- aggregate(age ~ trt01pn + trt01p, data = adsl, FUN = function(x) {
tmp_nrecs <- length(x)
tmp_nmiss <- sum(is.na(x))
tmp_n <- tmp_nrecs - tmp_nmiss
c(
tmp_nrecs = tmp_nrecs,
tmp_nmiss = tmp_nmiss,
tmp_n = tmp_n,
tmp_mean = mean(x, na.rm = TRUE),
tmp_stddev = sd(x, na.rm = TRUE),
tmp_min = min(x, na.rm = TRUE),
tmp_q1 = unname(quantile(x, 0.25, type = 2, na.rm = TRUE)),
tmp_median = median(x, na.rm = TRUE),
tmp_q3 = unname(quantile(x, 0.75, type = 2, na.rm = TRUE)),
tmp_max = max(x, na.rm = TRUE)
)
})
# Flatten the matrix column to real columns
stats01 <- do.call(data.frame, stats01)
# Remove analysis-variable prefix added by aggregate() (age.)
names(stats01) <- sub("^age\\.", "", names(stats01))
# Map intermediate names to final tidyverse-style names
final_map <- c(
tmp_nrecs = "nrecs",
tmp_nmiss = "nmiss",
tmp_n = "n",
tmp_mean = "mean",
tmp_stddev = "stddev",
tmp_min = "min",
tmp_q1 = "q1",
tmp_median = "median",
tmp_q3 = "q3",
tmp_max = "max"
)
names(stats01) <- ifelse(
names(stats01) %in% names(final_map),
final_map[names(stats01)],
names(stats01) # keep trt01pn, trt01p as-is
)