Needed libraries:
library(tidyverse)
library(sf)
library(patchwork)
library(stars)
library(raster)
library(gstat)An interpolation is the process in which we take observations of some parameter data to predict it value in unsampled locations. Very often this is done to convert point observations in some type of continuous distribution, usually a raster.
Any interpolation is a model. Sometimes the model is very simple and is only based on the distance between the observed and predicted locations (e.g. inverse distance weighted interpolation). Other times the models are more complex (e.g. geostatistical models, spatial splines) and may incorporate covariates (e.g. depth, temperature).
Let’s interpolate some cod data from the Icelandic bottom trawl survey. First, lets make an sf object with POINT geometry using the survey data from 2018.
biol <- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_biological.csv") %>%
filter(species == "cod")
cod <- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_stations.csv") %>%
filter(year == 2018) %>%
left_join(biol, by = "id") %>%
mutate(kg = replace_na(kg, 0)) %>%
dplyr::select(id, lon1, lat1, kg, n, duration) %>%
st_as_sf(coords = c("lon1", "lat1"), crs = 4326) %>%
st_transform(3395) # Mercator
iceland <- read_sf("ftp://ftp.hafro.is/pub/data/shapes/iceland_coastline.gpkg") %>%
st_transform(3395)A quick look at the data:
p1 <- ggplot() +
geom_sf(data = cod, aes(size = kg), alpha = 0.3, color = "red", show.legend = "point")
p2 <- ggplot() +
geom_histogram(data = cod, aes(x = kg))
library(patchwork)
p1 + p2A simple way to do an interpolation is to use Voronoi polygons. Each polygon includes all locations that are closest to the included point than to any other point
We can simply assume then that the values of our interpolation variable (cod abundance in our case) are constant in each polygon and equal to the included point.
pts <- cod %>%
st_combine() # To make a MULTIPOINT... but we loose the attributes
chull <- st_convex_hull(pts) %>%
st_buffer(10000) %>%
st_geometry() %>%
st_difference(iceland)
# What happens if we get a convex hull of the cod POINT object?
vor <- st_voronoi(pts) %>%
st_collection_extract() %>%
st_intersection(chull)
# Spatial join with the cod POINT object
vor <- vor %>%
st_as_sf() %>%
st_join(cod)
ggplot() +
geom_sf(data = vor, aes(fill = kg)) +
geom_sf(data = cod, col = "red", size = .5) +
scale_fill_viridis_c(trans = "log1p")
We can rasterize the polygons if needed.
ext <- extent(st_bbox(vor)[c(1, 3, 2, 4)])
crs <- st_crs(vor)[[2]]
vor.sp <- as_Spatial(vor)
target <- raster(ext = ext, crs = crs, res = 1000)
vor_rst <- rasterize(vor.sp, target, field = "kg")
plot(vor_rst)The gstats provides several methods for interpolation. One of the simplest one is the NN interpolation. Here for each cell in the target raster we take the 4 closest datapoints. With idp=0 we indicate that the distance is not taken into account.
gs <- gstat(formula = kg ~ 1, data = cod,
nmax = 4, set = list(idp = 0))
int <- interpolate(target, gs)## [inverse distance weighted interpolation]
plot(int)
Clearly we are interpolating to all the cell in the raster. Probably we
want to remove locations in land, and locations to far away from
sampling locations. One way to do this is to apply a mask.
chull_mask <- rasterize(as_Spatial(chull), target)
plot(chull_mask)int <- mask(int, chull_mask)
plot(int)Here the prediction in each cell is the weighted average of all observations. Weights are computed according to their distance to the interpolation location, elevated to a negative parameter p given by the idp argument.
Greater p values give more influence to the points that are closer.
gs <- gstat(formula = kg ~ 1, data = cod,
set = list(idp = 2.5))
int <- interpolate(target, gs) %>%
mask(chull_mask)## [inverse distance weighted interpolation]
plot(int)gs <- gstat(formula = kg ~ 1, data = cod,
set = list(idp = 5))
int <- interpolate(target, gs) %>%
mask(chull_mask)## [inverse distance weighted interpolation]
plot(int)
If we only use the closest neighbour, we obtain the Voronoi
polygons:
gs <- gstat(formula=kg~1, data = cod, nmax=1, set=list(idp=1))
int <- interpolate(target, gs) %>%
mask(chull_mask)## [inverse distance weighted interpolation]
plot(int)Here we will do a simple demonstration of kriging, a widely used geostatistical interpolation technique.
First let’s load our data…
biol <- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_biological.csv") %>%
filter(species == "cod")
cod <- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_stations.csv") %>%
filter(year == 2018) %>%
left_join(biol, by = "id") %>%
mutate(kg = replace_na(kg, 0)) %>%
dplyr::select(id, lon1, lat1, kg, n, duration) %>%
st_as_sf(coords = c("lon1", "lat1"), crs = 4326) %>%
st_transform(3395) # Mercator
iceland <- read_sf("ftp://ftp.hafro.is/pub/data/shapes/iceland_coastline.gpkg") %>%
st_transform(3395)and create our mask:
pts <- cod %>%
st_combine() # To make a MULTIPOINT... but we loose the attributes
chull <- st_convex_hull(pts) %>%
st_buffer(10000) %>%
st_geometry() %>%
st_difference(iceland)
target <- raster(xmn = -3040079,
xmx = -1173079,
ymn = 9062775,
ymx = 10221775,
crs = "+proj=merc",
res = 1000)
chull_mask <- rasterize(as_Spatial(chull), target)Now let’s compute the variogram, which is a model of how the observed data differ as a function of the distance (and possibly other variables).
ggplot() +
geom_histogram(data = cod, aes(x = kg))# ...very skewed. Let's interpolate in log scale'
# Experimental variogram
vg.exp <- variogram(log1p(kg) ~ 1, data = cod)
# Variogram model
vg.model <- vgm(model = "Exp", psill = 2,
range = 2e5)
# Model fit
vg.fit <- fit.variogram(vg.exp, vg.model)
plot(vg.exp, vg.fit)
Now let’s do the kriging.
# Let's not interpolate too far away from our sampling sites
cod_buf <- st_buffer(cod, 50000) %>%
st_union() %>%
nngeo::st_remove_holes() %>%
st_as_sf()
ggplot() +
geom_sf(data = cod_buf) +
geom_sf(data = iceland, fill = "darkgray") +
geom_sf(data = cod, color = "red") +
theme_bw()# Make an empty raster
proj <- st_crs(cod)[[2]]
target <- raster(xmn = -3080000,
xmx = -1130000,
ymn = 9000000,
ymx = 10260000,
res = 5000,
crs = proj)
mask1 <- rasterize(cod_buf, target)
mask2 <- rasterize(iceland, target)
mask1[mask2 == 1] <- NA
# Create a gstat object
model <- gstat(formula = log1p(kg) ~ 1, data = cod,
model = vg.fit)
kriged <- interpolate(target, model = model)## [using ordinary kriging]
kriged <- mask(kriged, mask1) # Apply the mask
kriged <- expm1(kriged) # Because we modeled in log scale
g <- ggplot() +
geom_stars(data = st_as_stars(kriged)) +
geom_sf(data = iceland) +
scale_fill_viridis_c(trans = "log")Finally, an example used Generalsed Additive Models (GAMs).
library(mgcv)
# We need the cod coordinates
cod_xy <- st_coordinates(cod) %>%
as_tibble()
cod <- bind_cols(cod, cod_xy)
# Fit a GAM with a Tweedie distribution and log link
# ... good choice for a skewed, positive distribution
my.model <- gam(kg ~ s(X, Y), data = cod, family=tw(link=log))
# Now lets get the coordinates for the prediction
locs <- which(chull_mask[] == 1)
pred.xy <- as.data.frame(xyFromCell(chull_mask, locs))
names(pred.xy) <- c("X", "Y") # They need to match the names of the GAM model
predicted <- predict(object = my.model,
newdata = pred.xy,
type = "response")
predicted.rst <- raster(chull_mask) # I get an 1D array
predicted.rst[locs] <- predicted # Put the values in the raster
plot(predicted.rst)