Code
# setup ------------------------------------------------------------------------
## libraries -------------------------------------------------------------------
library(vmstools)
library(tidyverse)
library(duckdb)
## ad-hoc functions ------------------------------------------------------------
### ramb functions
source("https://raw.githubusercontent.com/einarhjorleifsson/ramb/refs/heads/main/R/format.R")
source("https://raw.githubusercontent.com/einarhjorleifsson/ramb/refs/heads/main/R/sum_across.R")
### datacall functions - can not source global.R, here a datacall trip_assign as a temporary gist
source("https://gist.githubusercontent.com/einarhjorleifsson/deee46b493f1f95e95e905c15f1c2432/raw/e2d2a0a8241ac8f9c86c3971de677ff260d7319d/rb_trip_assign.R")
<- function(code_type, from_file = TRUE) {
rb_getCodeList if(from_file) {
::read_rds(paste0(here::here("data/icesVocab"), "/", code_type, ".rds"))
readrelse {
} ::getCodeList(code_type)
icesVocab
}
}<- rb_getCodeList("GearType")$Key
iv_gear <- rb_getCodeList("TargetAssemblage")$Key
iv_target <- rb_getCodeList("Metier6_FishingActivity")$Key
iv_met6
# Data pre-processing-----------------------------------------------------------
# Use eflalo and tacsat in the vmstools package as a demo
# Only do some minimal stuff here
## logbooks --------------------------------------------------------------------
data(eflalo)
# consolidate
<-
eflalop |>
eflalo as_tibble() |>
rb_sum_across("LE_KG", remove = TRUE) |>
rb_sum_across("LE_EURO", remove = TRUE) |>
mutate(D_DATIM = rb_create_timestamp(FT_DDAT, FT_DTIME),
L_DATIM = rb_create_timestamp(FT_LDAT, FT_LTIME),
# LE_CDAT = dmy(LE_CDAT), # should be, but ...
.after = FT_REF) |>
select(-c(FT_DDAT, FT_DTIME, FT_LDAT, FT_LTIME)) |>
rename(LE_MET6 = LE_MET_level6)
# NEW
<-
eflalop |>
eflalop # only needed if one wants to do additional testing
separate(LE_MET6, into = c(".m6gear", ".m6target", ".m6mesh", ".m6rest"), sep = "_", remove = FALSE, extra = "merge") |>
separate(.m6mesh, into = c(".m1", ".m2"), sep = "-", remove = FALSE, fill = "right", convert = TRUE) |>
separate(LE_ID, into = c(".etid", ".egear", ".eir"), sep = "-", remove = FALSE) |>
# TODO: Deal with .m1 or .m2 being characters
#mutate(.m2 = case_when(is.na(.m2) & as.character(.m1) == "0" ~ "0",
# .default = .m2)) |>
mutate(
checks = case_when( # datacall
# Catch record outlier - code pending # 1.3.2
::duplicated(paste(VE_REF, LE_ID, LE_CDAT)) ~ "01 duplicated events", # 1.3.3
baseis.na(D_DATIM) | is.na(L_DATIM) ~ "02 impossible time", # 1.3.4
year(D_DATIM) == (year(L_DATIM) - 1) ~ "03 new years trip", # 1.3.5
> L_DATIM ~ "04 departure before arrival", # 1.3.6
D_DATIM # "05 overlapping trips - code pending # 1.3.7
!LE_GEAR %in% iv_gear ~ "06 gear (metier 4) invalid", # 1.4.1
!LE_MET6 %in% iv_met6 ~ "07 metier 6 invalid", # 3.5.5
# Addendum, needs discussions
!between(dmy(LE_CDAT), as_date(D_DATIM), as_date(L_DATIM)) ~ "ok - 08 catch date not within trip",
# metier level 6 component checks
!= .m6gear ~ "ok - 01 met6 gear different from gear",
LE_GEAR !.m6target %in% iv_target ~ "ok - 10 met6 target invalid",
# event id component checks
!= .etid ~ "ok - 11 event id tripid different from tripid",
FT_REF != .egear ~ "ok - 12 event id gear different from gear",
LE_GEAR != .eir ~ "ok - 13 event id rectangle different from rectangle",
LE_RECT # Pending solution prior to case_when applied
#!between(mesh, as.integer(.m1), .m2) ~ "11 mesh size not in the met6 range",
.default = "ok")
)|>
eflalop count(checks) |>
mutate('%' = round(n / sum(n) * 100, 2)) |>
::kable(caption = "Logbooks: All records not starting with 'ok' will be filtered downstream") knitr
checks | n | % |
---|---|---|
01 duplicated events | 3 | 0.07 |
07 metier 6 invalid | 741 | 16.33 |
ok | 3775 | 83.17 |
ok - 01 met6 gear different from gear | 20 | 0.44 |
Code
<-
eflalop |>
eflalop select(-c(.etid, .egear, .eir,
|>
.m6gear, .m6target, .m6mesh, .m1, .m2, .m6rest)) filter(str_starts(checks, "ok")) |>
select(-checks)
# End of NEW
# create a tidy trip table
# eflalo is not a tidy structure - can be split into a tidy trip table and then
# a detail event table. tverse pipe code flow becomes more succinct downstream
<-
trips |>
eflalop group_by(VE_COU, VE_REF, FT_REF) |>
mutate(weight = sum(LE_KG),
price = sum(LE_EURO)) |>
ungroup() |>
select(VE_COU, VE_REF, VE_LEN, VE_KW, VE_TON, FT_REF, D_DATIM, L_DATIM, weight, price) |>
distinct(VE_COU, VE_REF, FT_REF, .keep_all = TRUE)
<- trips |> arrow::to_duckdb() # for later
duck_trips # The detail table, remove vessel and trip variables except those used as keys
<-
events |>
eflalop select(VE_COU, VE_REF, FT_REF, starts_with("LE_")) |>
# reduce the clutter for this demo test
::remove_empty(which = "cols")
janitor<- events |> arrow::to_duckdb() # for later
duck_events
## trails ----------------------------------------------------------------------
data(tacsat)
<-
tacsat |>
tacsat as_tibble() |>
mutate(SI_DATIM = ramb::rb_create_timestamp(SI_DATE, SI_TIME),
.after = VE_REF) |>
# select(-c(SI_DATE, SI_TIME)) |> assign_trip only works if these are available
arrange(VE_COU, VE_REF, SI_DATIM) |>
distinct(VE_COU, VE_REF, SI_DATIM, .keep_all = TRUE) |>
arrange(VE_COU, VE_COU, SI_DATIM) |>
mutate(.pid = 1L:n(),
.before = VE_COU)
if(FALSE) {
<- 5 * 60 # Interval threshold minimum [units: seconds]
it_min <-
tacsat |>
tacsat # TODO: There are different ways to skin a cat - here calculating sequential statistics
# group_by(cid, vid, time) |>
# see:
# mutate(dt = as.numeric(difftime(lead(time), time, units = "sec"))) |>
# fill(dt, .direction = "down") |>
# ungroup() |>
mutate(
checks =
case_when( # datacall
# will argue that these comes first (now done in 2.3.2)
is.na(VE_COU) ~ "00 1 no country id",
is.na(VE_REF) ~ "00 2 no vessel id",
is.na(SI_DATIM) ~ "00 3 no time",
is.na(SI_LONG) ~ "00 4 no lon",
is.na(SI_LATI) ~ "00 5 no lat",
# Would put question mark to this, one can derive speed
is.na(SI_SP) ~ "00 6 no speed",
# TODO: Outside ICES area", # 1.2.1
::duplicated(paste(VE_COU, VE_REF, SI_LONG, SI_LATI, SI_DATIM)) ~ "02 duplicates", # 1.2.2
base# This already inclusive in check 01, should be dropped
!between(SI_LONG, -180, 180) | !between(SI_LATI, -90, 90) ~ "03 coordinates out of bound", # 1.2.3
c(it_min, diff(unclass(SI_DATIM))) < it_min ~ "04 time interval exceeds threshhold", # 1.2.4
# TODO: Points in harbour # 1.2.5
.default = "ok")
)
|>
tacsat count(checks) |>
mutate('%' = round(n / sum(n) * 100, 2)) |>
::kable(caption = "VMS: All records not 'ok' will be filtered downstream")
knitr
<-
tacsat |>
tacsat filter(str_starts(checks, "ok")) |>
select(-c(checks))
}
<- tacsat
trails
# Analysis ---------------------------------------------------------------------
## Add trip information to pings -----------------------------------------------
### datacallway ----------------------------------------------------------------
<-
tacsatp mergeEflalo2Tacsat(eflalop |> as.data.frame(),
|> as.data.frame())
tacsat # NOTE: even information added downstream - separated to clarify thinking
<- c("VE_LEN", "VE_KW")
cols for (col in cols) {
<- eflalo[[col]][match(tacsatp$FT_REF, eflalo$FT_REF)]
tacsatp[[col]]
}### myway ----------------------------------------------------------------------
<-
trails |>
trails left_join(trips,
by = join_by(VE_COU, VE_REF, between(SI_DATIM, D_DATIM, L_DATIM)),
relationship = "many-to-one") |>
select(-c(D_DATIM, L_DATIM))
<- trails |> arrow::to_duckdb() # for later
duck_trails
### comparisons ----------------------------------------------------------------
identical(tacsatp$.pid, trails$.pid)
[1] FALSE
Code
<- tacsatp |> arrange(.pid)
tacsatp identical(tacsatp$VE_REF, replace_na(trails$VE_REF, "0")) # not really needed
[1] TRUE
Code
identical(tacsatp$FT_REF, replace_na(trails$FT_REF, "0")) # not really needed
[1] TRUE
Code
# TODO: check these
identical(tacsatp$VE_LEN, trails$VE_LEN)
[1] FALSE
Code
identical(tacsatp$VE_KW, trails$VE_KW)
[1] FALSE
Code
# check non-identicals
data(eflalo) # reload eflalo
<-
comparison |>
tacsatp as_tibble() |>
select(.pid, VE_COU, VE_REF, FT_REF, length_dc = VE_LEN, kw_dc = VE_KW) |>
left_join(trails |> select(.pid, VE_COU, VE_REF, FT_REF, length_mw = VE_LEN, kw_mw = VE_KW)) |>
filter(length_dc != length_mw | kw_dc != kw_mw) |>
select(-c(.pid, FT_REF)) |>
distinct()
|>
comparison left_join(eflalo |>
filter(VE_REF %in% unique(comparison$VE_REF)) |>
select(VE_REF, VE_LEN, VE_KW) |>
distinct()) |>
::kable(caption = "Assigning vessel info to pings - discrepancies\n'_dc': datacall, '_mw: myway, original from eflalo\nSeems like myway same as the original") knitr
VE_COU | VE_REF | length_dc | kw_dc | length_mw | kw_mw | VE_LEN | VE_KW |
---|---|---|---|---|---|---|---|
Atlantis | 729 | 24.73 | 221 | 25.00000 | 221 | 25.00000 | 221 |
Atlantis | 915 | 24.50 | 349 | 24.00000 | 221 | 24.00000 | 221 |
Atlantis | 864 | 25.27 | 184 | 17.52212 | 111 | 17.52212 | 111 |
Code
# myway seems correct, so something to check in the datacall flow
## Add gear, mesh and met6 to pings --------------------------------------------
# this is ambiguously called trip assign in the datacall flow
### datacallway ----------------------------------------------------------------
# Now add the event info - this step needed so that next step works - could be simplified
<- c("LE_GEAR", "LE_MSZ", "LE_RECT", "LE_MET6")
cols for (col in cols) {
<- eflalop[[col]][match(tacsatp$FT_REF, eflalop$FT_REF)]
tacsatp[[col]]
}
library(data.table) # needed in the trip_assign
<- trip_assign(tacsatp, eflalop, col = "LE_GEAR", haul_logbook = F)
tacsatpa_LE_GEAR # Here and subsequently, replaced %!in% with %in%, moving the ! upfront
<- rbindlist(list(tacsatp[!tacsatp$FT_REF %in% tacsatpa_LE_GEAR$FT_REF,], tacsatpa_LE_GEAR), fill = T)
tacsatp
<- trip_assign(tacsatp, eflalop, col = "LE_MSZ", haul_logbook = F)
tacsatpa_LE_MSZ <- rbindlist(list(tacsatp[!tacsatp$FT_REF %in% tacsatpa_LE_MSZ$FT_REF,], tacsatpa_LE_MSZ), fill = T)
tacsatp
<- trip_assign(tacsatp, eflalop, col = "LE_RECT", haul_logbook = F)
tacsatpa_LE_RECT <- rbindlist(list(tacsatp[!tacsatp$FT_REF %in% tacsatpa_LE_RECT$FT_REF,], tacsatpa_LE_RECT), fill = T)
tacsatp
<- trip_assign(tacsatp, eflalop, col = "LE_MET6", haul_logbook = F)
tacsatpa_LE_MET <- rbindlist(list(tacsatp[!tacsatp$FT_REF %in% tacsatpa_LE_MET$FT_REF,], tacsatpa_LE_MET), fill = T)
tacsatp
### myway ----------------------------------------------------------------------
# 1. For each fishing day, add the values of gear, mesh and met6 from the
# highest event catch record within the day (no summation done a priori)
# 2. For non-fishing days, add the values of gear, mesh and met6 from the
# highest trip catches
<-
highest_event_catch_per_day |>
events arrange(desc(LE_KG)) |>
group_by(VE_COU, VE_REF, FT_REF, LE_CDAT) |>
slice(1) |>
ungroup() |>
select(VE_COU, VE_REF, FT_REF, LE_CDAT, LE_GEAR, LE_MSZ, LE_MET6, LE_RECT)
<-
highest_trip_catches |>
events group_by(VE_COU, VE_REF, FT_REF, LE_GEAR, LE_MSZ, LE_MET6, LE_RECT) |>
summarise(LE_KG = sum(LE_KG),
.groups = "drop") |>
arrange(desc(LE_KG)) |>
group_by(VE_COU, VE_REF, FT_REF) |>
slice(1) |>
ungroup() |>
select(VE_COU, VE_REF, FT_REF, .LE_GEAR = LE_GEAR, .LE_MSZ = LE_MSZ,
.LE_MET6 = LE_MET6, .LE_RECT = LE_RECT)
<- trails
trails_copy <-
trails |>
trails left_join(highest_event_catch_per_day,
by = join_by(VE_COU, VE_REF, FT_REF, SI_DATE == LE_CDAT),
relationship = "many-to-one") |>
left_join(highest_trip_catches,
by = join_by(VE_COU, VE_REF, FT_REF),
relationship = "many-to-one") |>
mutate(LE_GEAR = case_when(is.na(LE_GEAR) & !is.na(.LE_GEAR) ~ .LE_GEAR,
.default = LE_GEAR),
LE_MSZ = case_when(is.na(LE_MSZ) & !is.na(.LE_MSZ) ~ .LE_MSZ,
.default = LE_MSZ),
LE_MET6 = case_when(is.na(LE_MET6) & !is.na(.LE_MET6) ~ .LE_MET6,
.default = LE_MET6),
LE_RECT = case_when(is.na(LE_RECT) & !is.na(.LE_RECT) ~ .LE_RECT,
.default = LE_RECT)) |>
select(-c(.LE_GEAR, .LE_MSZ, .LE_MET6, .LE_RECT))
### comparisons ----------------------------------------------------------------
identical(tacsatp$.pid, trails$.pid)
[1] FALSE
Code
<- tacsatp |> arrange(.pid)
tacsatp identical(tacsatp$.pid, trails$.pid)
[1] TRUE
Code
identical(tacsatp$FT_REF, replace_na(trails$FT_REF, "0"))
[1] TRUE
Code
identical(tacsatp$LE_MET6, trails$LE_MET6)
[1] TRUE
Code
# TODO: check these
# so some work needed, do not get identical
identical(tacsatp$LE_GEAR, trails$LE_GEAR)
[1] FALSE
Code
identical(tacsatp$LE_MSZ, trails$LE_MSZ)
[1] FALSE
Code
identical(tacsatp$LE_RECT, trails$LE_RECT)
[1] FALSE
Code
<-
comparison |>
tacsatp as_tibble() |>
select(.pid, VE_COU, VE_REF, FT_REF, LE_GEAR, LE_MSZ, LE_MET6, LE_RECT) |>
left_join(trails |> select(.pid, VE_COU, VE_REF, FT_REF,
.LE_GEAR = LE_GEAR, .LE_MSZ = LE_MSZ,
.LE_MET6 = LE_MET6, .LE_RECT = LE_RECT)) |>
# reorder
select(.pid, VE_COU, VE_REF, FT_REF, LE_GEAR, .LE_GEAR, LE_MSZ, .LE_MSZ, LE_MET6, .LE_MET6, LE_RECT, .LE_RECT)
|>
comparison #filter(LE_GEAR != .LE_GEAR | LE_MSZ != .LE_MSZ | LE_MET6 != .LE_MET6) |>
mutate(gear = if_else(LE_GEAR == .LE_GEAR, "yes", "no", "yes_na"),
mesh = if_else(LE_MSZ == .LE_MSZ, "yes", "no", "yes_na"),
met6 = if_else(LE_MET6 == .LE_MET6, "yes", "no", "yes_na"),
rect = if_else(LE_RECT == .LE_RECT, "yes", "no", "yes_na")) |>
count(gear, mesh, met6, rect) |>
mutate('%' = round(n / sum(n) * 100, 2)) |>
::kable(caption = "Assigning events info to pings - discrepancies between datacall vs. myway\nNeeds a revisit, at least the rectangles") knitr
gear | mesh | met6 | rect | n | % |
---|---|---|---|---|---|
no | yes | yes | no | 33 | 0.04 |
yes | no | yes | no | 841 | 0.98 |
yes | yes | yes | no | 28628 | 33.29 |
yes | yes | yes | yes | 38377 | 44.63 |
yes_na | yes_na | yes_na | yes_na | 18109 | 21.06 |
Code
# For now let's look at the event records were gear and rectangles are not the same (33 records)
<-
problem_trips |>
comparison #filter(LE_GEAR != .LE_GEAR | LE_MSZ != .LE_MSZ | LE_MET6 != .LE_MET6) |>
filter(LE_GEAR != .LE_GEAR & LE_RECT != .LE_RECT) |>
select(VE_COU, VE_REF, FT_REF) |>
distinct()
<-
problem_events |>
problem_trips left_join(events |> select(VE_REF, FT_REF, LE_CDAT, LE_GEAR, LE_RECT, LE_KG)) |>
arrange(LE_CDAT, desc(LE_KG))
|> knitr::kable(caption = "Events table, original data") problem_events
VE_COU | VE_REF | FT_REF | LE_CDAT | LE_GEAR | LE_RECT | LE_KG |
---|---|---|---|---|---|---|
Atlantis | 1527 | 270892 | 29/04/1800 | TBB | 36F5 | 688.5230 |
Atlantis | 1527 | 270892 | 29/04/1800 | OTB | 34F4 | 373.7451 |
Atlantis | 1527 | 270892 | 29/04/1800 | TBB | 34F4 | 319.9824 |
Code
|>
problem_events inner_join(trails) |>
arrange(SI_DATIM) |>
select(.pid, SI_DATIM, VE_REF, FT_REF, LE_CDAT, SI_DATE, LE_GEAR, LE_RECT, LE_KG) |>
#distinct(VE_REF, FT_REF, LE_CDAT, SI_DATE, LE_GEAR, LE_RECT, LE_KG, .keep_all = TRUE) |>
::kable(caption = "Discrepancies - myway") knitr
.pid | SI_DATIM | VE_REF | FT_REF | LE_CDAT | SI_DATE | LE_GEAR | LE_RECT | LE_KG |
---|---|---|---|---|---|---|---|---|
3696 | 1800-04-28 09:46:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3699 | 1800-04-28 11:42:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3702 | 1800-04-28 13:36:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3705 | 1800-04-28 15:32:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3708 | 1800-04-28 17:28:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3711 | 1800-04-28 19:22:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3714 | 1800-04-28 21:18:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3716 | 1800-04-28 23:12:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | TBB | 36F5 | 688.523 |
3718 | 1800-04-29 01:08:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3720 | 1800-04-29 03:04:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3722 | 1800-04-29 04:58:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3724 | 1800-04-29 06:54:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3726 | 1800-04-29 08:48:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3728 | 1800-04-29 10:44:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3731 | 1800-04-29 14:34:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3734 | 1800-04-29 18:24:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3736 | 1800-04-29 20:20:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | TBB | 36F5 | 688.523 |
3739 | 1800-04-30 00:10:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3741 | 1800-04-30 02:06:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3744 | 1800-04-30 05:56:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3746 | 1800-04-30 07:50:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3748 | 1800-04-30 09:46:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3750 | 1800-04-30 11:42:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3752 | 1800-04-30 13:36:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3754 | 1800-04-30 15:32:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3756 | 1800-04-30 17:28:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3760 | 1800-04-30 19:22:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3764 | 1800-04-30 21:18:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3768 | 1800-04-30 23:12:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | TBB | 36F5 | 688.523 |
3772 | 1800-05-01 01:08:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | TBB | 36F5 | 688.523 |
3776 | 1800-05-01 03:02:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | TBB | 36F5 | 688.523 |
3780 | 1800-05-01 04:58:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | TBB | 36F5 | 688.523 |
3784 | 1800-05-01 06:54:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | TBB | 36F5 | 688.523 |
Code
|>
problem_events inner_join(tacsatp) |>
arrange(SI_DATIM) |>
select(.pid, SI_DATIM, VE_REF, FT_REF, LE_CDAT, SI_DATE, LE_GEAR, LE_RECT, LE_KG) |>
#distinct(VE_REF, FT_REF, LE_CDAT, SI_DATE, LE_GEAR, LE_RECT, LE_KG, .keep_all = TRUE) |>
::kable(caption = "Discrepancies - datacall") knitr
.pid | SI_DATIM | VE_REF | FT_REF | LE_CDAT | SI_DATE | LE_GEAR | LE_RECT | LE_KG |
---|---|---|---|---|---|---|---|---|
3696 | 1800-04-28 09:46:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3699 | 1800-04-28 11:42:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3702 | 1800-04-28 13:36:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3705 | 1800-04-28 15:32:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3708 | 1800-04-28 17:28:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3711 | 1800-04-28 19:22:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3714 | 1800-04-28 21:18:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3716 | 1800-04-28 23:12:00 | 1527 | 270892 | 29/04/1800 | 28/04/1800 | OTB | 34F4 | 373.7451 |
3718 | 1800-04-29 01:08:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3720 | 1800-04-29 03:04:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3722 | 1800-04-29 04:58:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3724 | 1800-04-29 06:54:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3726 | 1800-04-29 08:48:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3728 | 1800-04-29 10:44:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3731 | 1800-04-29 14:34:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3734 | 1800-04-29 18:24:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3736 | 1800-04-29 20:20:00 | 1527 | 270892 | 29/04/1800 | 29/04/1800 | OTB | 34F4 | 373.7451 |
3739 | 1800-04-30 00:10:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3741 | 1800-04-30 02:06:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3744 | 1800-04-30 05:56:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3746 | 1800-04-30 07:50:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3748 | 1800-04-30 09:46:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3750 | 1800-04-30 11:42:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3752 | 1800-04-30 13:36:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3754 | 1800-04-30 15:32:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3756 | 1800-04-30 17:28:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3760 | 1800-04-30 19:22:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3764 | 1800-04-30 21:18:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3768 | 1800-04-30 23:12:00 | 1527 | 270892 | 29/04/1800 | 30/04/1800 | OTB | 34F4 | 373.7451 |
3772 | 1800-05-01 01:08:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | OTB | 34F4 | 373.7451 |
3776 | 1800-05-01 03:02:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | OTB | 34F4 | 373.7451 |
3780 | 1800-05-01 04:58:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | OTB | 34F4 | 373.7451 |
3784 | 1800-05-01 06:54:00 | 1527 | 270892 | 29/04/1800 | 01/05/1800 | OTB | 34F4 | 373.7451 |
Code
# TODO: Check why something is wrotten in the State of Denmark
print("There is a pending issue, the ways do not give the same results")
[1] "There is a pending issue, the ways do not give the same results"
Code
# Fireworks - running via duckdb -----------------------------------------------
<-
duck_highest_event_catch_per_day |>
duck_events arrange(desc(LE_KG)) |>
group_by(VE_COU, VE_REF, FT_REF, LE_CDAT) |>
filter(row_number()==1) |> # instead of slice
ungroup() |>
select(VE_COU, VE_REF, FT_REF, LE_CDAT, LE_GEAR, LE_MSZ, LE_MET6, LE_RECT)
<-
duck_highest_trip_catches |>
duck_events group_by(VE_COU, VE_REF, FT_REF, LE_GEAR, LE_MSZ, LE_MET6, LE_RECT) |>
summarise(LE_KG = sum(LE_KG),
.groups = "drop") |>
arrange(desc(LE_KG)) |>
group_by(VE_COU, VE_REF, FT_REF) |>
filter(row_number()==1) |> # instead of slice
ungroup() |>
select(VE_COU, VE_REF, FT_REF, .LE_GEAR = LE_GEAR, .LE_MSZ = LE_MSZ,
.LE_MET6 = LE_MET6, .LE_RECT = LE_RECT)
<-
duck_trails |>
duck_trails left_join(duck_highest_event_catch_per_day,
by = join_by(VE_COU, VE_REF, FT_REF, SI_DATE == LE_CDAT)) |>
# relationship = "many-to-one") |>
left_join(duck_highest_trip_catches,
by = join_by(VE_COU, VE_REF, FT_REF)) |>
# relationship = "many-to-one") |>
mutate(LE_GEAR = case_when(is.na(LE_GEAR) & !is.na(.LE_GEAR) ~ .LE_GEAR,
.default = LE_GEAR),
LE_MSZ = case_when(is.na(LE_MSZ) & !is.na(.LE_MSZ) ~ .LE_MSZ,
.default = LE_MSZ),
LE_MET6 = case_when(is.na(LE_MET6) & !is.na(.LE_MET6) ~ .LE_MET6,
.default = LE_MET6),
LE_RECT = case_when(is.na(LE_RECT) & !is.na(.LE_RECT) ~ .LE_RECT,
.default = LE_RECT)) |>
select(-c(.LE_GEAR, .LE_MSZ, .LE_MET6, .LE_RECT))