Concept

Data connect

Code
library(tidyverse)
library(patchwork)
library(arrow)
library(duckdb)
ais <- 
  open_dataset("/home/haf/einarhj/stasi/fishydata/data//ais/trail") |> 
  # Not in harbour
  filter(.cid > 0,                           # .cid positive -> not in harbour
         (whack == FALSE | is.na(whack))) |> # need to fix this
  # Fishing occurred on date
  filter(between(year, 2008, 2024)) |> 
  select(.cid, vid, time, speed, lon, lat, t1, t2, agf_gid, gid_trip, dd, dt) |> 
  filter(gid_trip == 6)                      # Bottom fish trawl

Gridding

Using speed alone

Code
dx <- 0.25
ais |> 
  mutate(speed = speed %/% dx * dx + dx/2) |> 
  count(vid, speed) |> 
  collect() ->
  s
s |> 
  mutate(speed = ifelse(speed > 15, 15, speed)) |> 
  group_by(speed) |> 
  reframe(n = sum(n)) |>
  ggplot(aes(speed, n / 1e6)) +
  theme_bw() +
  geom_col() +
  scale_x_continuous(breaks = seq(0, 20, by = 1)) +
  labs(x = "Speed [kn]", y = "Number of pings [millions]",
       caption = "Fish trawl")

So one could e.g try to use an arbitrary cuttoff for speed between 2.6 and 5.5 as an indicator of actual trawl fishing.

Code
# cap data - just to make for a nicer map
rb_cap <- function(x, Q = 0.975) {
  q = quantile(x, Q)
  ifelse(x > q, q, x)
}

dx <- 0.005
dy <- dx / 2   # grid resolution approximately 275 x 275 meters
g <- 
  ais |> 
  filter(between(speed, 2.625, 5.500)) |> 
  mutate(x = lon %/% dx * dx + dx/2,
         y = lat %/% dy * dy + dy/2) |> 
  group_by(x, y) |> 
  summarise(dt = sum(dt, na.rm = TRUE) / 60,  # effort in minutes
            .groups = "drop") |> 
  collect()
p1 <- 
  g |> 
  filter(between(x, -30, -20),
         between(y, 64.5, 67)) |> 
  mutate(dt = rb_cap(dt)) |> 
  ggplot(aes(x, y, fill = dt)) +
  theme_dark() +
  geom_tile() +
  scale_fill_viridis_c(direction = -1, guide = "none") +
  coord_quickmap(expand = 0) +
  scale_x_continuous(NULL, NULL) +
  scale_y_continuous(NULL, NULL) +
  labs(subtitle = "Speed alone",
       caption = "Speed cutoff: 2.625-5.500")
p1

In the above we are getting quite a lot of false positives as can be observed by the implied fishing inside the 12 miles, even within the fjords.

Using logbook time interval alone

Let’s instead filter by start (t1) and end (t2) of haul as registered in the logbooks:

Code
g <- 
  ais |> 
  filter(between(time, t1, t2)) |> 
  mutate(x = lon %/% dx * dx + dx/2,
         y = lat %/% dy * dy + dy/2) |> 
  group_by(x, y) |> 
  summarise(dt = sum(dt, na.rm = TRUE) / 60,
            .groups = "drop") |> 
  collect()
p2 <- 
  g |> 
  filter(between(x, -30, -20),
         between(y, 64.5, 67)) |> 
  mutate(dt = rb_cap(dt)) |> 
  ggplot(aes(x, y, fill = dt)) +
  theme_dark() +
  geom_tile() +
  scale_fill_viridis_c(direction = -1, guide = "none") +
  coord_quickmap(expand = 0) +
  scale_x_continuous(NULL, NULL) +
  scale_y_continuous(NULL, NULL) +
  labs(subtitle = "Logbooks start end time")
p1 + p2

So we are getting bit less of false positives close to shore as expected. But captains are known to forget to report at least the end of tow time, often making a hindsight guess. So let’s try a combo of both speed and time filter.

Both speed and time as filter

Code
g <- 
  ais |> 
  filter(between(speed, 2.625, 5.500)) |> 
  filter(between(time, t1, t2)) |> 
  mutate(x = lon %/% dx * dx + dx/2,
         y = lat %/% dy * dy + dy/2) |> 
  group_by(x, y) |> 
  summarise(dt = sum(dt, na.rm = TRUE) / 60,
            .groups = "drop") |> 
  collect()
p3 <- 
  g |> 
  filter(between(x, -30, -20),
         between(y, 64.5, 67)) |> 
  mutate(dt = rb_cap(dt)) |> 
  ggplot(aes(x, y, fill = dt)) +
  theme_dark() +
  geom_tile() +
  scale_fill_viridis_c(direction = -1, guide = "none") +
  coord_quickmap(expand = 0) +
  scale_x_continuous(NULL, NULL) +
  scale_y_continuous(NULL, NULL) +
  labs(subtitle = "Speed and logbooks",
       caption = "Speed cutoff: 2.625-5.500")
p1 + p2 + p3

Getting closer where we want to be. Now we do not see much data inside the 12 miles zones or nor the fjords. Except in Ísafjörður, but those splots are wrongly recorded shrimp trawls.

Another thing to note is that the data at this resolution is becoming granular, although we know that each tow track is a continuum. So either one has to grid using a lower resolution, create some spatial smoother, or make some interpolations.

Turning things into an raster

Code
library(terra)
r <- 
  g |> 
  filter(dt > 1,              # at least 1 minutes of effort per pixel
         between(x, -30, -10),
         between(y, 62, 68.5)) |> 
  mutate(dt = rb_cap(dt, 0.99)) |> 
  rast(type = "xyz",
       crs = "epsg:4326")
plot(r)

Quick mapping - bottom fish trawl

Code
library(stars)
library(tmap)
# library(tmap.mapgl)
tmap_mode("view")
values(r) <- log(values(r))  # just to get some contrast in the map
s <- r |> st_as_stars()
tm_shape(s) +
  tm_raster("dt", palette = "-inferno")

Let’s do the whole thing over all in one go, now for longlines

Code
open_dataset("/home/haf/einarhj/stasi/fishydata/data//ais/trail") |> 
  # Not in harbour
  filter(.cid > 0,                           # .cid positive -> not in harbour
         (whack == FALSE | is.na(whack))) |> # need to fix this
  # Fishing occurred on date
  filter(between(year, 2008, 2024)) |> 
  filter(gid_trip %in% c(12, 13, 21)) |>    # longlines
  filter(between(speed, s1, s2)) |>    # use what is already in the data
  filter(between(time, t1, t2)) |> 
  mutate(x = lon %/% dx * dx + dx/2,
         y = lat %/% dy * dy + dy/2) |> 
  group_by(x, y) |> 
  summarise(dt = sum(dt, na.rm = TRUE) / 60,
            .groups = "drop") |> 
  collect() |> 
  filter(dt > 1,              # at least 1 minutes of effort per pixel
         between(x, -30, -10),
         between(y, 62, 68.5)) |> 
  mutate(dt = rb_cap(dt, 0.99)) |> 
  mutate(dt = log(dt)) |> 
  rast(type = "xyz",
       crs = "epsg:4326") |> 
  st_as_stars() |> 
  tm_shape() +
  tm_raster("dt", palette = "-inferno")

… more adds - jiggers

Code
open_dataset("/home/haf/einarhj/stasi/fishydata/data//ais/trail") |> 
  # Not in harbour
  filter(.cid > 0,                           # .cid positive -> not in harbour
         (whack == FALSE | is.na(whack))) |> # need to fix this
  # Fishing occurred on date
  filter(between(year, 2008, 2024)) |> 
  filter(gid_trip %in% c(14)) |>    # jiggers
  filter(between(speed, s1, s2)) |>    # use what is already in the data
  filter(between(time, t1, t2)) |> 
  mutate(x = lon %/% dx * dx + dx/2,
         y = lat %/% dy * dy + dy/2) |> 
  group_by(x, y) |> 
  summarise(dt = sum(dt, na.rm = TRUE) / 60,
            .groups = "drop") |> 
  collect() |> 
  filter(dt > 1,              # at least 1 minutes of effort per pixel
         between(x, -30, -10),
         between(y, 62, 68.5)) |> 
  mutate(dt = rb_cap(dt, 0.99)) |> 
  mutate(dt = log(dt)) |> 
  rast(type = "xyz",
       crs = "epsg:4326") |> 
  st_as_stars() |> 
  tm_shape() +
  tm_raster("dt", palette = "-inferno")

… try hex

Code
# https://urbandatapalette.com/post/2021-08-tessellation-
h <- 
  g |> 
  filter(dt > 1,              # at least 1 minutes of effort per pixel
         between(x, -30, -10),
         between(y, 62, 68.5)) |> 
  mutate(dt = rb_cap(dt, 0.99)) |> 
  st_as_sf(coords = c("x", "y"),
           crs = 4326) |> 
  slice(1:199) |> 
  st_make_grid(c(dx, dy), what = "polygons", square = FALSE)
h <-
  h |> 
  st_as_sf() |> 
  mutate(n = 1)
tm_shape(h) +
  tm_fill(
    col = "n",
    palette = "Reds",
    style = "cont",
    #title = "Number of collisions",
    id = "n",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of collisions: " = "n"
    ))