Merge nearest date, and related variables from a another dataframe by group

Here is the solution based on the base package:

z <- lapply(intersect(df1$ID,df2$ID),function(id) {
   d1 <- subset(df1,ID==id)
   d2 <- subset(df2,ID==id)

   d1$indices <- sapply(d1$dateTarget,function(d) which.min(abs(d2$dateTarget - d)))
   d2$indices <- 1:nrow(d2)

   merge(d1,d2,by=c('ID','indices'))
  })

z2 <- do.call(rbind,z)
z2$indices <- NULL

print(z2)

#    ID dateTarget.x Value dateTarget.y ValueMatch
# 1   3   2015-11-14    47   2015-07-06         48
# 2   3   2015-12-08    98   2015-07-06         48
# 3   3   2015-02-22    52   2015-03-09         94
# 4   3   2014-11-17    68   2014-12-15         95
# 5   3   2013-05-30    91   2013-04-01         85
# 6   1   2013-11-04    70   2014-02-21         35
# 7   1   2014-12-29    18   2014-12-06         88
# 8   2   2013-01-14    52   2013-04-08         77
# 9   2   2015-07-29    97   2015-08-01         68
# 10  2   2015-06-15    98   2015-08-01         68

Leave a Comment