library(tidyverse)
library(sf)
library(units)First, lets load some data. The files datras_2018_haul.csv and datras_2018_length.csv contain data from ICES’s DATRAS (the Database of Trawl Surveys) from 2018. We will create an sf object with the total number of Nephrops sampled per tow.
# Load nephrops data from ICES
## get station data
survey <- read_csv("ftp://ftp.hafro.is/pub/data/csv/datras_2018_haul.csv",
guess_max = 2000) %>%
filter(year == 2018) %>%
select(id,
survey,
lat = shootlat, # Get starting point
lon = shootlong)
## get length data
nephrops <- read_csv("ftp://ftp.hafro.is/pub/data/csv/datras_2018_length.csv") %>%
filter(latin == "Nephrops norvegicus") %>%
left_join(survey, by = "id") %>%
group_by(id) %>%
summarise(survey = first(survey),
lat = first(lat),
lon = first(lon),
num = sum(n)) %>%
ungroup()
head(nephrops)## # A tibble: 6 × 5
## id survey lat lon num
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 BITS_2018_1_26HF_TVS_3 BITS 57.8 11.1 8
## 2 BITS_2018_1_26HF_TVS_32 BITS 55.7 10.8 2
## 3 BITS_2018_1_26HF_TVS_4 BITS 57.7 10.8 2
## 4 BITS_2018_4_26HF_TVS_25 BITS 56.0 11.0 6
## 5 BTS_2018_3_END_BT4AI_106 BTS 54.7 -4.15 28
## 6 BTS_2018_3_END_BT4AI_117 BTS 54.5 -3.69 26
Next, let’s convert it into an sf object so we can do spatial operations.
nephrops <- nephrops %>%
st_as_sf(coords = c("lon", "lat"), crs = 4326)
class(nephrops)## [1] "sf" "tbl_df" "tbl" "data.frame"
Note that we need to specify which columns have the x and y coordinates (always list the x coordinate first), and the coordinate reference system (CRS). The number 4326 is the ESPG code for unprojected data using the WSG84 datum. Use this CRS for latitude and longitude data, unless you know the data has some other CRS.
Now, let’s load the Nephrops functional units. These are spatial regions used for assessment and reporting.
nfu <- read_sf("ftp://ftp.hafro.is/pub/data/shapes/nephrops_fu.gpkg")Note that the CRS in the nfu object is the same as in the np object. This is a fundamental requirement. If we want to do spatial operations between two datasets, they need to be in the same coordinate system.
Let’s take a look at both objects:
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops, size = 0.3)
## About binary predicates Binary predicates are operations that
test the topological relationship between two geometries,
resulting in a TRUE or FALSE for every combination of features in the
two sf objects used as input.
The sf package provides a family of functions to implement several binary predicates:
Each function tests a relationship between the interior, boundary or exterior of one object with one of the same attributes of the other object. This is based on what is known as the Dimensionally Extended nine-Intersection Model (DE-9IM).
The functions can return either a sparse index list (class
sgbp), or if we use the ‘sparse = FALSE’ argument, we
obtain a logical matrix.
Let’s find out in which Nephrops functional unit are located the records in the nephrops data:
test1 <- st_within(nephrops, nfu)
class(test1)## [1] "sgbp" "list"
test1## Sparse geometry binary predicate list of length 355, where the
## predicate was `within'
## first 10 elements:
## 1: 25
## 2: (empty)
## 3: 19
## 4: (empty)
## 5: 5
## 6: 5
## 7: 5
## 8: 5
## 9: 6
## 10: 6
test2 <- st_within(nephrops, nfu, sparse = FALSE)
class(test2)## [1] "matrix" "array"
dim(test2)## [1] 355 30
test2[1, ]## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] TRUE FALSE FALSE FALSE FALSE FALSE
As you can see, in both ways we find out that for example the first record in the Nephrops data is located in the 25th functional unit (which is the Kattegat).
If we use the sparse = FALSE argument, we can combine
the binary predicate functions with filter() to filter
records according to the spatial relation with another spatial
object.
As a reminder, we can subset data frames (and sf objects) by using a logical vector, or an expression that generates a logical vector. For example
nephrops_bits <- nephrops[nephrops$survey == "BITS", ]Or using the tidiverse
nephrops_bits <- nephrops %>%
filter(survey == "BITS")In a similar way, we can subset a sf object by asking if they relate or not in some way to another sf object. In other words, we can do a spatial subsetting.
nfu_kattegat<- nfu %>% filter(name == "Kattegat")
# Within the Kattegat
nephrops_kattegat <- nephrops %>%
filter(st_within(x = ., y = nfu_kattegat, sparse = FALSE))
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops_kattegat, size = 0.3) +
theme(legend.position = "none")# Outside the Kattegat
nephrops_no_kattegat <- nephrops %>%
filter(st_disjoint(x = ., y = nfu_kattegat, sparse = FALSE))
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops_no_kattegat, size = 0.3) +
theme(legend.position = "none")# At least 200 km from Kattegat
distance <- set_units(200, km)
nephrops_near_kattegat <- nephrops %>%
filter(st_is_within_distance(x = ., y = nfu_kattegat, sparse = FALSE, dist = distance))
ggplot() +
geom_sf(data = nfu, aes(fill = name), alpha = 0.5) +
geom_sf(data = nephrops_near_kattegat, size = 0.3) +
theme(legend.position = "none")We can join two sf objects based on areas of shared geographic space
(spatial join, as opposed to shared variables as in non-spatial joins).
For this, we use the st_join function.
nephrops <- nephrops %>%
st_join(nfu %>% select(nfu = name),
join = st_intersects)
ggplot() +
geom_sf(data = nephrops, aes(color = nfu), size = 0.3) +
theme(legend.position = "none")
And an example with two sets of polygons:
ier <- read_sf("ftp://ftp.hafro.is/pub/data/shapes/ices_ecoregions.gpkg") %>%
select(ecoregion) %>% # 17 polygons
st_make_valid()
ospar <- read_sf("ftp://ftp.hafro.is/pub/data/shapes/ospar.gpkg") %>%
select(subregion) # 50 polygons
ospar_ier <- st_join(ospar, ier) %>%
mutate(label = str_c(subregion,"-", ecoregion))
ggplot() +
geom_sf(data = ospar, col = "red", fill = NA) +
geom_sf(data = ier, col = "blue", fill = NA)ospar_ier <- st_join(ospar, ier) %>%
mutate(label = str_c(subregion,"-", ecoregion)) # 92 polygons!!!
ggplot() +
geom_sf(data = ospar_ier, aes(fill = subregion))ggplot() +
geom_sf(data = ospar_ier, aes(fill = ecoregion))# outcommented - bug
# ospar_ier <- st_join(ospar, ier, largest = TRUE) # 50 polygonsThe default predicate in the st_join() function is st_intersects. But others could be used, included:
st_contains_properly
st_contains
st_covered_by
st_covers
st_crosses
st_disjoint
st_equals_exact
st_equals
st_is_within_distance
st_nearest_feature
st_overlaps
st_touches
st_within
Note the warning although coordinates are longitude/latitude, st_intersects assumes that they are planar. This is because the polygons are not in a coordinate projected system.
Number of Nephrops records per nfu
nephrops_count <- nephrops %>%
st_join(nfu) %>%
count(name)
nephrops_count## Simple feature collection with 24 features and 2 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -14.876 ymin: 36.2343 xmax: 12.619 ymax: 60.733
## Geodetic CRS: WGS 84
## # A tibble: 24 × 3
## name n geometry
## * <chr> <int> <MULTIPOINT [°]>
## 1 Biscay North 10 ((-3.8914 47.4293), (-4.7403 47.8189), …
## 2 Botney Gut – Silver Pit 16 ((4.8863 54.1291), (4.5336 54.2468), (4…
## 3 Cantabrian Sea 2 ((-5.0075 43.6372), (-2.2275 43.556))
## 4 Celtic Sea – Labadie 10 ((-7.3009 49.6217), (-7.1894 50.4339), …
## 5 Celtic Sea – the Smalls 5 ((-6.3856 51.0943), (-6.5103 51.239), (…
## 6 Devil’s Hole 5 ((1.4454 57.2137), (0.2778 56.3815), (1…
## 7 Farn Deeps 12 ((-1.1537 55.3711), (-1.117 55.397), (-…
## 8 Firth of Clyde + Sound of Jura 8 ((-5.321 55.278), (-5.184 55.305), (-5.…
## 9 Fladen Ground 22 ((0.5712 60.2098), (0.5788 60.267), (1.…
## 10 Gulf of Cadiz 15 ((-6.9763 36.6735), (-7.0602 36.7323), …
## # … with 14 more rows
Note that the result of this is another sf object, with geometry type MULTIPOINT.