MID <-
nanoparquet::read_parquet(here::here("data/auxillary/maritime_identification_digits.parquet")) |>
# used for classifying likely incomplete mmsi-signals
mutate(MID_child = paste0("98", MID),
MID_aid = paste0("99", MID))
## Call sign prefix - flag state
cs.prefix <-
nanoparquet::read_parquet(here::here("data/auxillary/callsign_prefix.parquet")) |>
# critical, lot of mess with TF in stk localid and globalid
filter(cs_prefix != "TF")
fixed <-
c("Surtseyja", "Straumnes", "Steinanes", "Haganes_K", "Bakkafjar",
"Laugardal", "BorgfjE P", "Gemlufall", "Straumduf", "Eyrarhlid",
"Hvalnes", "Straumm?l", "V_Blafj P", "Isafj.dju", "Rey?arfjo",
"VidarfjAI", "KLIF AIS", "VadlahAIS", "Hafell_AI", "TIND_AIS", "Storhof?i",
"Helguv", "Laugarb.f", "Grimseyja", "G.skagi", "Grindavik", "Hornafjar",
"Flateyjar", "Kogurdufl", "Blakkur.d", "Bakkafjor", "Hvalbakur", "SUGANDI_A",
"TJALDANES", "Snaefj1",
"Snaefj2", "Lande", "Sjomsk", "TJALD.NES", "illvid_p", "BLAKKSNES", "V_Sfell B",
"HOF", "Illvi?rah", "Miðfegg P", "BASE11", "Borgarfj ",
"V_Hofsos", "V_Hofsos ", "Arnarfjor", "Trackw", "SUGANDAFJ",
"BORGARÍS", "BORGARIS", "BORGARIS0", "BORGARIS1",
"ThverfAIS",
"TEST",
"Hvannadal", "Tjaldanes", "BorglAIS", "HvalnAIS", "Midf_AIS",
"Hellish A", "GreyAIS", "Berufjor?",
"Baeir", "Frodarh_A", "Onundarfj", "HusavikAI", "Haukadals",
"Drangsnes", "Hofdahusa", "Djupiv-AI", "Dyrafjor?", "Faskru?sf",
"Fossfjor?", "Hvestudal", "Hringsdal", "Bakkafj-d", "Mulagong",
"Grnipa P", "Haenuvk P", "Bolafj P", "Ennish P", "Grimsey P",
"Frodarh P", "Haoxl B", "Hafell P", "Vidarfj P", "Djupiv P",
"Blafj P", "Sigmundar", "Tjnes P", "Sfell P", "Hellish P",
"Gvkurfj P", "Klif P", "Thverfj B", "Klif B", "Grimsey B",
"Frodarh B", "Hvalnes P", "Haoxl P", "Grnipa B", "Illvidh P",
"FLATEYRI_", "Hellish B", "Husavik B", "Hofsos P", "Faskra?sf",
"Husavik P", "Tjornes P", "Thorbj B", "Borgarh-P", "Baeir B",
"VadlahP", "Thverfj P", "Dalvik P", "Godat-P", "HafellAIS",
"Bjolfur P", "Ennish B", "Thorbj P", "Hraunh P", "Gufusk P",
"Lambhgi P", "Fri?rik A", "Baeir P", "Flatey du", "Fellsgg1P",
"Fellsgg2P", "Akurtr B", "Midfell-P", "Horgargru", "Borgarl P",
"Haenuvk B", "Gagnhdi P", "Hvalnes B", "HVestudal", "Gildruh B",
"Sfell B", "Gagnhdi B", "BorgfjE B", "Spolur-P", "Klakkur P",
"KOLBEINSE", "Stykkh P", "Tjnes B", "Kvigindis", "Dufl_GRV_",
"Fell P", "Steinny-P", "Stokksn P", "Tjorn P", "Kopasker",
"Akreyri P", "Grima P", "Dalatgi B", "ThverfjP", "Rifssker_",
"Dalatgi P", "Tjorn B", "Kolmuli_K", "Vattarnes", "Thorbjorn",
"Husavik", "Hafranes_", "Drangaj_P", "Hrisey", "Hofsos",
"Midfegg P", "Midf P", "Gufunes P", "Mi?fegg P", "Dalvík P",
"Dalvik", "Borgfj E", "Straumn-A", "Talknaf P",
"Steinny", "TILK", "ThverfjP1", "Heidar-P", "Vadlaheid",
"Talknaf B", "BLAKK_AIS", "Mork-P", "VPN_Bauja",
"PF7567", "Daltat", "AEDEY AIS", "Borgfj E",
# should really use the mobile id here, at least it is easier
# because that is how things are checked iteratively
"2515036", "2311200", "2311400", "2573900", "2311500",
"2515071", "25150051", "2314000",
"251510120", # Skipstjóraskólinn
"231140005",
"231140003",
"251999898",
"231140004",
"231140006",
"231140001",
"231140002",
"251513130",
"xxx5",
"103984",
"Borgfj E",
"Borgfj E ",
"BLAKK_OLD",
"Gufunes B",
"Blondos P",
"Mork P",
"Va?lahei?"
)
kvi <-
c("Eyri_Kvi_", "Kvi_Skotu", "Kvi_Baeja", "Bjarg_Kvi", "Sjokvi-4", "Sjokvi-3",
"Kvi-0 Hri", "Sjokvi-2", "Kvi_Sande", "Kvi_Fenri", "Sjokvi",
"Y.Kofrady")
hafro <-
c("Hafro_Str", "Hafro_O2_", "Hafro_CO2", "Hafro_H11", "Hafro_H20", "Hafro_Hva",
"Hafro_duf", "AfmHafro_", "Afm_Hafro", "Hafro_W.V", "Hafro_W.V ", "afm_Hafro",
"Rannsokn_")
v_drop_na <- function(x) {
x[!is.na(x)]
}
v <-
nanoparquet::read_parquet(here::here("data/vessels/vessels_iceland.parquet"))
mobile |>
left_join(trail |> count(mobileid, name = "pings") |> collect()) |>
rename(g = globalid) |>
filter(!is.na(g)) |>
mutate(
type =
case_when(is.na(pings) ~ "no trail",
g %in% fixed ~ "fixed",
g %in% kvi ~ "kvi",
g %in% hafro ~ "hafro",
g %in% v_drop_na(v$cs) ~ "cs_isl",
g %in% v_drop_na(v$mmsi) ~ "mmsi_isl",
g %in% v_drop_na(v$uid) ~ "uid_isl",
str_sub(g, 1, 2) %in% cs.prefix$cs_prefix &
!numbers_only(str_trim(g)) &
!str_starts(g, "MOB_") ~ "cs",
numbers_only(g) & str_sub(g, 1, 5) %in% MID$MID_child ~ "mmsi_child",
numbers_only(g) & str_sub(g, 1, 5) %in% MID$MID_aid ~ "mmsi_aid",
numbers_only(g) & nchar(g) == 9 ~ "mmsi",
g == localid & g %in% as.character(v_drop_na(v$vid)) ~ "vid-vid",
tolower(localid) %in% c("unkown", "unknown") & g %in% as.character(v_drop_na(v$vid)) ~ "unknown-vid",
str_sub(g, 1, 4) %in% v_drop_na(v$cs) & str_detect(toupper(g), "NET") ~ "cs_NET",
str_sub(g, 1, 4) %in% v_drop_na(v$cs) & str_ends(g, "2") ~ "cs_2",
str_sub(g, 1, 4) %in% as.character(v$vid) & str_detect(toupper(g), "NET") ~ "vid_NET",
str_sub(g, 1, 2) %in% cs.prefix$cs_prefix ~ "cs_prefix",
.default = "something else")) |>
count(type) |>
arrange(-n) |>
knitr::kable(caption = "Non-exhaustive classification of globalid - number of observations")