Announcement Icon Online training class for Clinical R programming batch starts on Monday, 02Feb2026. Click here for details.

Retain last non-missing value


Lesson Description
-
  • Sometimes, we want to work with the concept of "Retain last non-missing value" in a clear, repeatable way.
  • This lesson walks through a simple example and shows the key steps.
  • We will see one approach on how we can do it in SAS and R.
*=============================================================================;
* Create the input dataset;
*=============================================================================;

data lnms;
    input usubjid visitnum score;
    datalines;
101 1 20
101 2 .
101 3 30
102 1 90
102 2 10
102 3 .
102 4 .
;
run;

*=============================================================================;
* Sort the dataset by usubjid and visitnum;
*=============================================================================;
proc sort data=lnms;
    by usubjid visitnum;
run;

*=============================================================================;
* Retain last non-missing score;
*=============================================================================;
data lnms01;
    set lnms;
    by usubjid visitnum;
    
    
retain last_score;
    * Store the original score;
    orig_score = score;

    
* Update the last non-missing score if present;
    
if not missing(score) then last_score = score;

    
* If missing, retain the previous value;
    
if missing(score) then score = last_score;
run;
#==============================================================================;
#Retain last non-missing score;
#==============================================================================;

lnms <- tribble(
  ~usubjid,~visitnum,~score,
  101,1,20,
  101,2,NA,
  101,3,30,
  102,1,90,
  102,2,10,
  102,3,NA,
  102,4,NA
)

lnms01 <- lnms %>% 
  mutate(orig_score=score) %>% 
  arrange(usubjid,visitnum) %>%
  group_by(usubjid) %>% 
  fill(score,.direction="down")
lnms <- data.frame(
  usubjid = c(101, 101, 101, 102, 102, 102, 102),
  visitnum = c(1, 2, 3, 1, 2, 3, 4),
  score = c(20, NA, 30, 90, 10, NA, NA)
  , stringsAsFactors = FALSE
)

lnms01 <- lnms[order(lnms$usubjid, lnms$visitnum), ]
lnms01$orig_score <- lnms01$score

lnms01$score <- ave(lnms01$score, lnms01$usubjid, FUN = function(x) {
  last <- NA
  for (i in seq_along(x)) {
    if (!is.na(x[i])) {
      last <- x[i]
    } else if (!is.na(last)) {
      x[i] <- last
    }
  }
  x
})
  • ave() runs a custom fill-forward function within each subject.
  • The loop replaces NA with the most recent non-missing value.