Fill NA in a time series only to a limited number

Here’s another way:

l <- cumsum(! is.na(x))
c(NA, x[! is.na(x)])[replace(l, ave(l, l, FUN=seq_along) > 4, 0) + 1]
# [1]  1  1  1  1  5  5  5  5 NA NA 11 12 12 12 12 NA NA NA 19 20

edit: my previous answer required that x have no duplicates. The current answer does not.

benchmarks

x <- rep(x, length.out=1e4)

plourde <- function(x) {
    l <- cumsum(! is.na(x))
    c(NA, x[! is.na(x)])[replace(l, ave(l, l, FUN=seq_along) > 4, 0) + 1]
}

agstudy <- function(x) {
    unlist(sapply(split(coredata(x),cumsum(!is.na(x))),
           function(sx){
             if(length(sx)>3) 
               sx[2:4] <- rep(sx[1],3)
             else sx <- rep(sx[1],length(sx))
             sx
           }))
}

microbenchmark(plourde(x), agstudy(x))
# Unit: milliseconds
#        expr   min     lq median     uq   max neval
#  plourde(x)  5.30  5.591  6.409  6.774 57.13   100
#  agstudy(x) 16.04 16.249 16.454 17.516 20.64   100

Leave a Comment