Announcement Icon Online training class for Clinical R programming batch starts on Monday, 02Feb2026. Click here for details.

Character Functions


Lesson Description
-
  • Sometimes, we want to work with the concept of "Character Functions" in a clear, repeatable way.
  • This lesson walks through a simple example and shows the key steps.
  • We will see one approach on how we can do it in SAS and R.

 


data alldata;
    infile cards dsd dlm='|' truncover;
    input dtc : $20.;
cards;
2025
2025-01
2025-01-10
2025-02-28
2025-03
2025-12-31
2025-12-31T23
2025-12-31T23:59
2025-12-31T23:59:59
.
;
run;

data dates01;
    set alldata;

    
length=length(dtc);
    countw=countw(dtc,'T');
    scan_fword=scan(dtc,1,'T');
    countc=countc(dtc,'-');
    findt=find(dtc,'T','i');
    substr_date=substr(dtc,1,10);
    concat=catx(" ""Event start:",dtc);
    translate=translate(dtc,'-',':');
    tranwrd=tranwrd(dtc,'2025','20xx');
    missing=missing(dtc);

run;

 

alldata <- tribble(
  ~dtc,
  "2025",
  "2025-01",
  "2025-01-10",
  "2025-02-28",
  "2025-03",
  "2025-12-31",
  "2025-12-31T23",
  "2025-12-31T23:59",
  "2025-12-31T23:59:59",
  NA
)


dates01 <- alldata %>%
  mutate(
    length = str_length(dtc),                         # LENGTH(dtc)
    countw = str_count(dtc, "T") + 1,                  # COUNTW(dtc, 'T')
    scan_fword = word(dtc, 1, sep = "T"),             # SCAN(dtc, 1, 'T')
    countc = str_count(dtc, "-"),                     # COUNTC(dtc, '-')
    findt = str_locate(dtc, "T")[, 1],                # FIND(dtc, 'T')
    substr_date = str_sub(dtc, 1, 10),                # SUBSTR(dtc, 1, 10)
    concat = str_c("Event start:", dtc, sep = " "),   # CATX(" ", ...)
    translate = chartr(":","-",  dtc),                # TRANSLATE(dtc, '-', ':')
    tranwrd = str_replace_all(dtc, "2025", "20xx"),   # TRANWRD(dtc, '2025', '20xx')
    missing = is.na(dtc)                              # MISSING(dtc)
  )
alldata <- data.frame(
  dtc = c(
    "2025",
    "2025-01",
    "2025-01-10",
    "2025-02-28",
    "2025-03",
    "2025-12-31",
    "2025-12-31T23",
    "2025-12-31T23:59",
    "2025-12-31T23:59:59",
    NA
  ),
  stringsAsFactors = FALSE
)

dates01 <- alldata

# length of the full string (NA stays NA)
dates01$length <- nchar(dates01$dtc)

# initialize index for non-missing dtc
idx <- !is.na(dates01$dtc)

# count of parts when split by "T"
dates01$countw <- NA_integer_
dates01$countw[idx] <- lengths(
  strsplit(dates01$dtc[idx], "T", fixed = TRUE)
)

# first part before "T" (date portion)
dates01$scan_fword <- NA_character_
dates01$scan_fword[idx] <- sapply(
  strsplit(dates01$dtc[idx], "T", fixed = TRUE),
  `[`,
  1
)

# count of "-" in the full string (corrected section)
dates01$countc <- NA_integer_
dates01$countc[idx] <- sapply(
  gregexpr("-", dates01$dtc[idx], fixed = TRUE),
  function(pos) {
    if (pos[1] == -1) 0L else length(pos)
  }
)

findt_pos <- regexpr("T", dates01$dtc, fixed = TRUE)

dates01$findt <- ifelse(findt_pos == -1, NA, findt_pos)

dates01$substr_date <- substr(dates01$dtc, 1, 10)

dates01$concat <- ifelse(is.na(dates01$dtc), NA, paste("Event start:", dates01$dtc))

dates01$translate <- chartr(":", "-", dates01$dtc)

dates01$tranwrd <- gsub("2025", "20xx", dates01$dtc)

dates01$missing <- is.na(dates01$dtc)
  • nchar(), strsplit(), and sapply() are used to count and extract pieces.
  • gregexpr()/regexpr() find patterns; substr() extracts substrings.
  • paste(), chartr(), and gsub() transform text; is.na() flags missing.