Needed libraries:
library(tidyverse)
library(sf)
library(patchwork)
library(stars)
library(raster)
library(gstat)
An interpolation is the process in which we take observations of some parameter data to predict it value in unsampled locations. Very often this is done to convert point observations in some type of continuous distribution, usually a raster.
Any interpolation is a model. Sometimes the model is very simple and is only based on the distance between the observed and predicted locations (e.g. inverse distance weighted interpolation). Other times the models are more complex (e.g. geostatistical models, spatial splines) and may incorporate covariates (e.g. depth, temperature).
Let’s interpolate some cod data from the Icelandic bottom trawl survey. First, lets make an sf object with POINT geometry using the survey data from 2018.
<- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_biological.csv") %>%
biol filter(species == "cod")
<- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_stations.csv") %>%
cod filter(year == 2018) %>%
left_join(biol, by = "id") %>%
mutate(kg = replace_na(kg, 0)) %>%
::select(id, lon1, lat1, kg, n, duration) %>%
dplyrst_as_sf(coords = c("lon1", "lat1"), crs = 4326) %>%
st_transform(3395) # Mercator
<- read_sf("ftp://ftp.hafro.is/pub/data/shapes/iceland_coastline.gpkg") %>%
iceland st_transform(3395)
A quick look at the data:
<- ggplot() +
p1 geom_sf(data = cod, aes(size = kg), alpha = 0.3, color = "red", show.legend = "point")
<- ggplot() +
p2 geom_histogram(data = cod, aes(x = kg))
library(patchwork)
+ p2 p1
A simple way to do an interpolation is to use Voronoi polygons. Each polygon includes all locations that are closest to the included point than to any other point
We can simply assume then that the values of our interpolation variable (cod abundance in our case) are constant in each polygon and equal to the included point.
<- cod %>%
pts st_combine() # To make a MULTIPOINT... but we loose the attributes
<- st_convex_hull(pts) %>%
chull st_buffer(10000) %>%
st_geometry() %>%
st_difference(iceland)
# What happens if we get a convex hull of the cod POINT object?
<- st_voronoi(pts) %>%
vor st_collection_extract() %>%
st_intersection(chull)
# Spatial join with the cod POINT object
<- vor %>%
vor st_as_sf() %>%
st_join(cod)
ggplot() +
geom_sf(data = vor, aes(fill = kg)) +
geom_sf(data = cod, col = "red", size = .5) +
scale_fill_viridis_c(trans = "log1p")
We can rasterize the polygons if needed.
<- extent(st_bbox(vor)[c(1, 3, 2, 4)])
ext <- st_crs(vor)[[2]]
crs
<- as_Spatial(vor)
vor.sp
<- raster(ext = ext, crs = crs, res = 1000)
target
<- rasterize(vor.sp, target, field = "kg")
vor_rst
plot(vor_rst)
The gstats provides several methods for interpolation. One of the simplest one is the NN interpolation. Here for each cell in the target raster we take the 4 closest datapoints. With idp=0 we indicate that the distance is not taken into account.
<- gstat(formula = kg ~ 1, data = cod,
gs nmax = 4, set = list(idp = 0))
<- interpolate(target, gs) int
## [inverse distance weighted interpolation]
plot(int)
Clearly we are interpolating to all the cell in the raster. Probably we
want to remove locations in land, and locations to far away from
sampling locations. One way to do this is to apply a mask.
<- rasterize(as_Spatial(chull), target)
chull_mask plot(chull_mask)
<- mask(int, chull_mask)
int plot(int)
Here the prediction in each cell is the weighted average of all observations. Weights are computed according to their distance to the interpolation location, elevated to a negative parameter p given by the idp argument.
Greater p values give more influence to the points that are closer.
<- gstat(formula = kg ~ 1, data = cod,
gs set = list(idp = 2.5))
<- interpolate(target, gs) %>%
int mask(chull_mask)
## [inverse distance weighted interpolation]
plot(int)
<- gstat(formula = kg ~ 1, data = cod,
gs set = list(idp = 5))
<- interpolate(target, gs) %>%
int mask(chull_mask)
## [inverse distance weighted interpolation]
plot(int)
If we only use the closest neighbour, we obtain the Voronoi
polygons:
<- gstat(formula=kg~1, data = cod, nmax=1, set=list(idp=1))
gs <- interpolate(target, gs) %>%
int mask(chull_mask)
## [inverse distance weighted interpolation]
plot(int)
Here we will do a simple demonstration of kriging, a widely used geostatistical interpolation technique.
First let’s load our data…
<- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_biological.csv") %>%
biol filter(species == "cod")
<- read_csv("ftp://ftp.hafro.is/pub/data/csv/is_smb_stations.csv") %>%
cod filter(year == 2018) %>%
left_join(biol, by = "id") %>%
mutate(kg = replace_na(kg, 0)) %>%
::select(id, lon1, lat1, kg, n, duration) %>%
dplyrst_as_sf(coords = c("lon1", "lat1"), crs = 4326) %>%
st_transform(3395) # Mercator
<- read_sf("ftp://ftp.hafro.is/pub/data/shapes/iceland_coastline.gpkg") %>%
iceland st_transform(3395)
and create our mask:
<- cod %>%
pts st_combine() # To make a MULTIPOINT... but we loose the attributes
<- st_convex_hull(pts) %>%
chull st_buffer(10000) %>%
st_geometry() %>%
st_difference(iceland)
<- raster(xmn = -3040079,
target xmx = -1173079,
ymn = 9062775,
ymx = 10221775,
crs = "+proj=merc",
res = 1000)
<- rasterize(as_Spatial(chull), target) chull_mask
Now let’s compute the variogram, which is a model of how the observed data differ as a function of the distance (and possibly other variables).
ggplot() +
geom_histogram(data = cod, aes(x = kg))
# ...very skewed. Let's interpolate in log scale'
# Experimental variogram
<- variogram(log1p(kg) ~ 1, data = cod)
vg.exp
# Variogram model
<- vgm(model = "Exp", psill = 2,
vg.model range = 2e5)
# Model fit
<- fit.variogram(vg.exp, vg.model)
vg.fit
plot(vg.exp, vg.fit)
Now let’s do the kriging.
# Let's not interpolate too far away from our sampling sites
<- st_buffer(cod, 50000) %>%
cod_buf st_union() %>%
::st_remove_holes() %>%
nngeost_as_sf()
ggplot() +
geom_sf(data = cod_buf) +
geom_sf(data = iceland, fill = "darkgray") +
geom_sf(data = cod, color = "red") +
theme_bw()
# Make an empty raster
<- st_crs(cod)[[2]]
proj
<- raster(xmn = -3080000,
target xmx = -1130000,
ymn = 9000000,
ymx = 10260000,
res = 5000,
crs = proj)
<- rasterize(cod_buf, target)
mask1 <- rasterize(iceland, target)
mask2
== 1] <- NA
mask1[mask2
# Create a gstat object
<- gstat(formula = log1p(kg) ~ 1, data = cod,
model model = vg.fit)
<- interpolate(target, model = model) kriged
## [using ordinary kriging]
<- mask(kriged, mask1) # Apply the mask
kriged
<- expm1(kriged) # Because we modeled in log scale
kriged
<- ggplot() +
g geom_stars(data = st_as_stars(kriged)) +
geom_sf(data = iceland) +
scale_fill_viridis_c(trans = "log")
Finally, an example used Generalsed Additive Models (GAMs).
library(mgcv)
# We need the cod coordinates
<- st_coordinates(cod) %>%
cod_xy as_tibble()
<- bind_cols(cod, cod_xy)
cod
# Fit a GAM with a Tweedie distribution and log link
# ... good choice for a skewed, positive distribution
<- gam(kg ~ s(X, Y), data = cod, family=tw(link=log))
my.model
# Now lets get the coordinates for the prediction
<- which(chull_mask[] == 1)
locs
<- as.data.frame(xyFromCell(chull_mask, locs))
pred.xy names(pred.xy) <- c("X", "Y") # They need to match the names of the GAM model
<- predict(object = my.model,
predicted newdata = pred.xy,
type = "response")
<- raster(chull_mask) # I get an 1D array
predicted.rst <- predicted # Put the values in the raster
predicted.rst[locs]
plot(predicted.rst)