library(tidyverse)
library(sf)
library(units)
First, lets load some data. The files datras_2018_haul.csv and datras_2018_length.csv contain data from ICES’s DATRAS (the Database of Trawl Surveys) from 2018. We will create an sf object with the total number of Nephrops sampled per tow.
# Load nephrops data from ICES
## get station data
<- read_csv("ftp://ftp.hafro.is/pub/data/csv/datras_2018_haul.csv",
survey guess_max = 2000) %>%
filter(year == 2018) %>%
select(id,
survey,lat = shootlat, # Get starting point
lon = shootlong)
## get length data
<- read_csv("ftp://ftp.hafro.is/pub/data/csv/datras_2018_length.csv") %>%
nephrops filter(latin == "Nephrops norvegicus") %>%
left_join(survey, by = "id") %>%
group_by(id) %>%
summarise(survey = first(survey),
lat = first(lat),
lon = first(lon),
num = sum(n)) %>%
ungroup()
head(nephrops)
## # A tibble: 6 × 5
## id survey lat lon num
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 BITS_2018_1_26HF_TVS_3 BITS 57.8 11.1 8
## 2 BITS_2018_1_26HF_TVS_32 BITS 55.7 10.8 2
## 3 BITS_2018_1_26HF_TVS_4 BITS 57.7 10.8 2
## 4 BITS_2018_4_26HF_TVS_25 BITS 56.0 11.0 6
## 5 BTS_2018_3_END_BT4AI_106 BTS 54.7 -4.15 28
## 6 BTS_2018_3_END_BT4AI_117 BTS 54.5 -3.69 26
Next, let’s convert it into an sf object so we can do spatial operations.
<- nephrops %>%
nephrops st_as_sf(coords = c("lon", "lat"), crs = 4326)
class(nephrops)
## [1] "sf" "tbl_df" "tbl" "data.frame"
Note that we need to specify which columns have the x and y coordinates (always list the x coordinate first), and the coordinate reference system (CRS). The number 4326 is the ESPG code for unprojected data using the WSG84 datum. Use this CRS for latitude and longitude data, unless you know the data has some other CRS.
Now, let’s load the Nephrops functional units. These are spatial regions used for assessment and reporting.
<- read_sf("ftp://ftp.hafro.is/pub/data/shapes/nephrops_fu.gpkg") nfu
Note that the CRS in the nfu object is the same as in the np object. This is a fundamental requirement. If we want to do spatial operations between two datasets, they need to be in the same coordinate system.
Let’s take a look at both objects:
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops, size = 0.3)
## About binary predicates Binary predicates are operations that
test the topological relationship between two geometries,
resulting in a TRUE or FALSE for every combination of features in the
two sf objects used as input.
The sf package provides a family of functions to implement several binary predicates:
Each function tests a relationship between the interior, boundary or exterior of one object with one of the same attributes of the other object. This is based on what is known as the Dimensionally Extended nine-Intersection Model (DE-9IM).
The functions can return either a sparse index list (class
sgbp
), or if we use the ‘sparse = FALSE’ argument, we
obtain a logical matrix.
Let’s find out in which Nephrops functional unit are located the records in the nephrops data:
<- st_within(nephrops, nfu)
test1 class(test1)
## [1] "sgbp" "list"
test1
## Sparse geometry binary predicate list of length 355, where the
## predicate was `within'
## first 10 elements:
## 1: 25
## 2: (empty)
## 3: 19
## 4: (empty)
## 5: 5
## 6: 5
## 7: 5
## 8: 5
## 9: 6
## 10: 6
<- st_within(nephrops, nfu, sparse = FALSE)
test2 class(test2)
## [1] "matrix" "array"
dim(test2)
## [1] 355 30
1, ] test2[
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] TRUE FALSE FALSE FALSE FALSE FALSE
As you can see, in both ways we find out that for example the first record in the Nephrops data is located in the 25th functional unit (which is the Kattegat).
If we use the sparse = FALSE
argument, we can combine
the binary predicate functions with filter()
to filter
records according to the spatial relation with another spatial
object.
As a reminder, we can subset data frames (and sf objects) by using a logical vector, or an expression that generates a logical vector. For example
<- nephrops[nephrops$survey == "BITS", ] nephrops_bits
Or using the tidiverse
<- nephrops %>%
nephrops_bits filter(survey == "BITS")
In a similar way, we can subset a sf object by asking if they relate or not in some way to another sf object. In other words, we can do a spatial subsetting.
<- nfu %>% filter(name == "Kattegat")
nfu_kattegat
# Within the Kattegat
<- nephrops %>%
nephrops_kattegat filter(st_within(x = ., y = nfu_kattegat, sparse = FALSE))
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops_kattegat, size = 0.3) +
theme(legend.position = "none")
# Outside the Kattegat
<- nephrops %>%
nephrops_no_kattegat filter(st_disjoint(x = ., y = nfu_kattegat, sparse = FALSE))
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops_no_kattegat, size = 0.3) +
theme(legend.position = "none")
# At least 200 km from Kattegat
<- set_units(200, km)
distance
<- nephrops %>%
nephrops_near_kattegat filter(st_is_within_distance(x = ., y = nfu_kattegat, sparse = FALSE, dist = distance))
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops_near_kattegat, size = 0.3) +
theme(legend.position = "none")
We can join two sf objects based on areas of shared geographic space
(spatial join, as opposed to shared variables as in non-spatial joins).
For this, we use the st_join
function.
<- nephrops %>%
nephrops st_join(nfu %>% select(nfu = name),
join = st_intersects)
ggplot() +
geom_sf(data = nephrops, aes(color = nfu), size = 0.3) +
theme(legend.position = "none")
And an example with two sets of polygons:
<- read_sf("ftp://ftp.hafro.is/pub/data/shapes/ices_ecoregions.gpkg") %>%
ier select(ecoregion) %>% # 17 polygons
st_make_valid()
<- read_sf("ftp://ftp.hafro.is/pub/data/shapes/ospar.gpkg") %>%
ospar select(subregion) # 50 polygons
<- st_join(ospar, ier) %>%
ospar_ier mutate(label = str_c(subregion,"-", ecoregion))
ggplot() +
geom_sf(data = ospar, col = "red", fill = NA) +
geom_sf(data = ier, col = "blue", fill = NA)
<- st_join(ospar, ier) %>%
ospar_ier mutate(label = str_c(subregion,"-", ecoregion)) # 92 polygons!!!
ggplot() +
geom_sf(data = ospar_ier, aes(fill = subregion))
ggplot() +
geom_sf(data = ospar_ier, aes(fill = ecoregion))
# outcommented - bug
# ospar_ier <- st_join(ospar, ier, largest = TRUE) # 50 polygons
The default predicate in the st_join() function is st_intersects. But others could be used, included:
st_contains_properly
st_contains
st_covered_by
st_covers
st_crosses
st_disjoint
st_equals_exact
st_equals
st_is_within_distance
st_nearest_feature
st_overlaps
st_touches
st_within
Note the warning although coordinates are longitude/latitude, st_intersects assumes that they are planar. This is because the polygons are not in a coordinate projected system.
Number of Nephrops records per nfu
<- nephrops %>%
nephrops_count st_join(nfu) %>%
count(name)
nephrops_count
## Simple feature collection with 24 features and 2 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -14.876 ymin: 36.2343 xmax: 12.619 ymax: 60.733
## Geodetic CRS: WGS 84
## # A tibble: 24 × 3
## name n geometry
## * <chr> <int> <MULTIPOINT [°]>
## 1 Biscay North 10 ((-3.8914 47.4293), (-4.7403 47.8189), …
## 2 Botney Gut – Silver Pit 16 ((4.8863 54.1291), (4.5336 54.2468), (4…
## 3 Cantabrian Sea 2 ((-5.0075 43.6372), (-2.2275 43.556))
## 4 Celtic Sea – Labadie 10 ((-7.3009 49.6217), (-7.1894 50.4339), …
## 5 Celtic Sea – the Smalls 5 ((-6.3856 51.0943), (-6.5103 51.239), (…
## 6 Devil’s Hole 5 ((1.4454 57.2137), (0.2778 56.3815), (1…
## 7 Farn Deeps 12 ((-1.1537 55.3711), (-1.117 55.397), (-…
## 8 Firth of Clyde + Sound of Jura 8 ((-5.321 55.278), (-5.184 55.305), (-5.…
## 9 Fladen Ground 22 ((0.5712 60.2098), (0.5788 60.267), (1.…
## 10 Gulf of Cadiz 15 ((-6.9763 36.6735), (-7.0602 36.7323), …
## # … with 14 more rows
Note that the result of this is another sf object, with geometry type MULTIPOINT.