Get the indices where the y value of two consecutive time steps have different sign. Use linear interpolation between these points to generate new x values where y is zero.
First, a smaller example to make it easier to get a feeling for the linear interpolation and which points are added to the original data:
# original data
d <- data.frame(x = 1:6,
y = c(-1, 2, 1, 2, -1, 1))
# coerce to data.table
library(data.table)
setDT(d)
# make sure data is ordered by x
setorder(d, x)
# add a grouping variable
# only to keep track of original and interpolated points in this example
d[ , g := "orig"]
# interpolation
d2 = d[ , {
ix = .I[c(FALSE, abs(diff(sign(d$y))) == 2)]
if(length(ix)){
pred_x = sapply(ix, function(i) approx(x = y[c(i-1, i)], y = x[c(i-1, i)], xout = 0)$y)
rbindlist(.(.SD, data.table(x = pred_x, y = 0, g = "new")))} else .SD
}]
d2
# x y grp
# 1 1.000000 -1 orig
# 2 2.000000 2 orig
# 3 3.000000 1 orig
# 4 4.000000 2 orig
# 5 5.000000 -1 orig
# 6 6.000000 1 orig
# 13 1.333333 0 new
# 11 4.666667 0 new
# 12 5.500000 0 new
Plot with original and new points differentiated by color:
ggplot(data = d2, aes(x = x, y = y)) +
geom_area(data = d2[y <= 0], fill = "red", alpha = 0.2) +
geom_area(data = d2[y >= 0], fill = "blue", alpha = 0.2) +
geom_point(aes(color = g), size = 4) +
scale_color_manual(values = c("red", "black")) +
theme_bw()
Apply on OP’s data:
d = as.data.table(orig)
# setorder(d, year)
d2 = d[ , {
ix = .I[c(FALSE, abs(diff(sign(d$afw))) == 2)]
if(length(ix)){
pred_yr = sapply(ix, function(i) approx(afw[c(i-1, i)], year[c(i-1, i)], xout = 0)$y)
rbindlist(.(.SD, data.table(year = pred_yr, afw = 0)))} else .SD}]
ggplot(data = d2, aes(x = year, y = afw)) +
geom_area(data = d2[afw <= 0], fill = "red") +
geom_area(data = d2[afw >= 0], fill = "blue") +
theme_bw()
In reply to @Jason Whythe’s comment, the method above can be modified to account for grouped data. The interpolation is made within each group, and the plot is facetted by group:
# data grouped by 'id'
d = data.table(
id = rep(c("a", "b", "c"), c(6, 5, 4)),
x = as.numeric(c(1:6, 1:5, 1:4)),
y = c(-1, 2, 1, 2, -1, 1,
0, -2, 0, -1, -2,
2, 1, -1, 1.5))
# again, this variable is just added for illustration
d[ , g := "orig"]
d2 = d[ , {
ix = .I[c(FALSE, abs(diff(sign(.SD$y))) == 2)]
if(length(ix)){
pred_x = sapply(ix, function(i) approx(x = d$y[c(i-1, i)], y = d$x[c(i-1, i)], xout = 0)$y)
rbindlist(.(.SD, data.table(x = pred_x, y = 0, g = "new")))} else .SD
}, by = id]
ggplot(data = d2, aes(x = x, y = y)) +
facet_wrap(~ id) +
geom_area(data = d2[y <= 0], fill = "red", alpha = 0.2) +
geom_area(data = d2[y >= 0], fill = "blue", alpha = 0.2) +
geom_point(aes(color = g), size = 4) +
scale_color_manual(values = c("red", "black")) +
theme_bw()
For an alternative base
solution adapted from @kohske‘s answer here (credits to him), see previous edits.