interpolation.Rmd
NOTE:
library(data.table)
library(sf)
library(traipse)
library(tidyverse)
# code snippet example, needs further work
tmp <-
out.eh %>%
filter(vessel_id == 1) %>%
group_by(vessel_id, tid) %>%
mutate(x = track_intermediate(lon, lat,
date = time_stamp, duration = 2)) %>%
ungroup()
tmp %>%
unnest(x)
hb <-
ramb::read_is_harbours() %>%
filter(iso2a == "IS",
# filter out some smaller harbours, cause a nuisance downstream
!hid %in% c("HRI", "ASS", "HAU", "GRE", "MJH", "MJO", "AED", "HJA")) %>%
mutate(hid = 1:n())
d <-
ramb::read_is_survey_tracks() %>%
mutate(.rid = 1:n()) %>%
arrange(vid, time) %>%
select(.rid, everything()) %>%
st_as_sf(coords = c("lon", "lat"),
crs = 4326,
remove = FALSE) %>%
st_join(hb %>% select(hid)) %>%
st_drop_geometry() %>%
# here check for wacky points
group_by(vid) %>%
mutate(pings = n(),
sd = ifelse(pings > 3, track_distance(lon, lat), NA_real_),
ss = ifelse(pings > 3, track_speed(lon, lat, time), NA_real_),
st = track_time(time),
duration = sum(st) / 60, # duration in minutues
whacky = ramb::rb_whacky_speed(lon, lat, time)) %>%
ungroup()
# define trips, then interpolate
d <-
d %>%
group_by(vid) %>%
mutate(inh = !is.na(hid),
tid = ramb::rb_trip(inh)) %>%
ungroup()
d %>%
filter(vid == 1131, tid == 1) %>%
filter(between(lon, -22, -10),
between(lat, 66.25, 70)) %>%
group_by(vid, tid) %>%
mutate(x = track_intermediate(lon, lat,
date = time, duration = 10*60)) %>%
ungroup() %>%
unnest(x) %>%
ggplot() +
geom_point(aes(int_x, int_y), colour = "blue", size = 0.7) +
geom_point(aes(lon, lat), colour = "red", size = 0.3) +
coord_quickmap()
Data:
hbs <- readRDS("~/ass/2021/WKSSFGEO/github/WKSSFGEO/data/harbours.rds")
#> Error in gzfile(file, "rb"): cannot open the connection
githubURL1 <- "https://raw.githubusercontent.com/ices-eg/WKSSFGEO/main/data-examples/example_data_AIS.csv"
d <-
readr::read_csv(githubURL1) %>%
unique(by = c("vessel_id", "time_stamp")) %>% # Remove duplicates
mutate(vessel_id = str_sub(vessel_id, 4) %>% as.integer(),
.rid = 1:n())
Jeppe:
devtools::source_url("https://raw.githubusercontent.com/ices-eg/WKSSFGEO/main/R-dev/jepol/define_trips_pol.R")
devtools::source_url("https://raw.githubusercontent.com/ices-eg/WKSSFGEO/main/R-dev/jepol/add_harbours.R")
# devtools::source_url("https://raw.githubusercontent.com/ices-eg/WKSSFGEO/main/R-dev/jepol/interpolate_ais.R")
system.time(out.jp <-
readr::read_csv(githubURL1) %>%
unique(by = c("vessel_id", "time_stamp")) %>% # Remove duplicates
mutate(vessel_id = str_sub(vessel_id, 4) %>% as.integer(),
.rid = 1:n()) %>%
setDT() %>%
add_harbours(hbs) %>%
define_trips_pol(min_dur = 0.0001, max_dur = 50e6,
split_trips = FALSE, preserve_all = TRUE))
#> Error: object 'hbs' not found
Einar:
rb_trip <- function(x) {
tibble::tibble(x = x) %>%
dplyr::mutate(tid = dplyr::if_else(x != dplyr::lag(x), 1L, 0L, 1L),
tid = ifelse(x, -tid, tid)) %>%
dplyr::group_by(x) %>%
dplyr::mutate(tid = cumsum(tid)) %>%
dplyr::ungroup(x) %>%
dplyr::pull(tid)
}
system.time(out.eh <-
readr::read_csv(githubURL1) %>%
unique(by = c("vessel_id", "time_stamp")) %>% # Remove duplicates
mutate(vessel_id = str_sub(vessel_id, 4) %>% as.integer(),
.rid = 1:n()) %>%
sf::st_as_sf(coords = c("lon", "lat"),
crs = 4326,
remove = FALSE) %>%
sf::st_join(hbs %>% mutate(hid = 1:n()) %>% select(hid)) %>%
sf::st_drop_geometry() %>%
group_by(vessel_id) %>%
mutate(inh = !is.na(hid),
tid = rb_trip(inh)) %>%
ungroup())
#> Error: object 'hbs' not found
# Pad harbour id, adding harbour id left to the first ping of a trip and harbour id entered
# to the last ping of the trip.
rb_pad_harbour <- function(tid, hid) {
tibble::tibble(.tid = tid,
.hid = hid) %>%
dplyr::mutate(.hid = dplyr::case_when(.tid != dplyr::lag(.tid) & .tid > 0 ~ dplyr::lag(.hid),
.tid != dplyr::lead(.tid) & .tid > 0 ~ dplyr::lead(.hid),
TRUE ~ .hid)) %>%
dplyr::pull(.hid)
}
out.eh %>%
group_by(vessel_id) %>%
mutate(hid = rb_pad_harbour(tid, hid)) %>%
filter(tid > 0) %>%
group_by(vessel_id, tid) %>%
slice_tail(n = 1) %>%
view()
#> Error: object 'out.eh' not found
Number of records:
tibble(original = nrow(d),
jp = nrow(out.jp),
eh = nrow(out.eh))
#> Error: object 'out.jp' not found
Any vessel difference?:
full_join(out.eh %>% count(vessel_id, name = "n.eh"),
out.jp %>% count(vessel_id, name = "n.jp"))
#> Error: object 'out.eh' not found
So vessels 5 and 8 not returned in the jp-workflow.
out.eh %>%
filter(vessel_id %in% c(5, 8)) %>%
count(vessel_id, inh)
#> Error: object 'out.eh' not found
Reason seemms the jp-flow does not return records if all pings are in harbour. This may be intentional - should possibly be mentioned in the help file that this takes place.
Number of pings per trip:
full_join(out.eh %>% mutate(tid = ifelse(tid < 0, NA, tid)) %>% count(vessel_id, tid, name = "n.eh"),
out.jp %>% separate(trip_id, c("dummy", "tid"), convert = TRUE) %>% count(vessel_id, tid, name = "n.jp")) %>%
mutate(diff = n.eh - n.jp) %>%
knitr::kable()
#> Error: object 'out.eh' not found
So difference is in all cases except vessel 7, trip 8 one less ping in the eh-algorithm compared with the jp-algoritm.
tmp <-
out.eh %>%
select(.rid, vessel_id, time_stamp, hid.eh = hid, inh, tid.eh = tid) %>%
full_join(out.jp %>% as_tibble() %>% select(.rid, hid.jp = SI_HARB, tid.jp = trip_id) %>% mutate(in.jp = TRUE)) %>%
mutate(in.jp = replace_na(in.jp, TRUE)) %>%
separate(tid.jp, c("dummy", "tid.jp"), convert = TRUE) %>%
select(-dummy)
#> Error: object 'out.eh' not found
The reason for the difference of one is that the jp-algorithm includes the first harbour ping at the end of a trip as being part of the trip. Case in point:
What about vessel 7, trip 8?
tmp %>%
filter(vessel_id == 7, tid.jp == 8) %>%
data.table()
#> Error: object 'tmp' not found
Seems like the last record for this trip did not terminate in a harbour, as illustrated here (actually it does not even start in a harbour):