Replace amlrPinnipeds mentions with tamatoamlr. temporary filters to …

…remove any 2023/24 data. Use surveyed_pst instead of surveyed_san_telmo
us-amlr · Feb 29, 2024 · 4bae361 · 4bae361 · smwoodman · Feb 29, 2024
1 parent c9169ce
commit 4bae361
Show file tree

Hide file tree

Showing 11 changed files with 313 additions and 277 deletions.
diff --git a/.github/workflows/data-to-dwc.yml b/.github/workflows/data-to-dwc.yml
@@ -11,8 +11,10 @@ jobs:
     runs-on: windows-latest
     # runs-on: ubuntu-latest #errors out when installing 'units' package
     steps:
-      - uses: actions/checkout@v3
-      - uses: r-lib/actions/setup-r@v2
+      - uses: actions/checkout@v4
+      - uses: r-lib/actions/setup-r@v2\
+        with:
+          r-version: '4.3.2'
       - uses: r-lib/actions/setup-renv@v2
 
       - name: Generate data # Run the script

diff --git a/R/data_to_dwc.R b/R/data_to_dwc.R
@@ -1,4 +1,4 @@
-# Convert CS-PHOC dataset to Darwin Core-compliant tables
+# Convert CS-PHOC dataset to Darwin Core compliant records
 
 library(dplyr)
 library(readr)
@@ -7,7 +7,7 @@ library(worrms)
 library(here)
 
 
-# Read data---------------------------------------------------------------------
+# Read CSV data-----------------------------------------------------------------
 x.header <- read.csv(here("data", "manuscript", "cs-phoc-headers.csv"))
 x.count <- read.csv(here("data", "manuscript", "cs-phoc-counts.csv"))
 
@@ -30,8 +30,8 @@ event <- x.header %>%
                         paste(census_date_start, census_date_end, sep = "/")),  
     # whatever that cannot be mapped to Darwin Core terms goes to dynamicProperties
     dynamicProperties = sprintf(
-      '{"research_program": "%s", "surveyed_san_telmo": %s}', 
-      research_program, if_else(surveyed_san_telmo, "true", "false")
+      '{"research_program": "%s", "surveyed_pst": %s}', 
+      research_program, if_else(surveyed_pst, "true", "false")
     ),
     # add recommended Darwin Core terms: https://dwc.tdwg.org/terms/#event
     decimalLongitude = "-60.77",
@@ -50,7 +50,7 @@ event <- x.header %>%
   rename(eventID = header_id) %>%
   # fields that cannot be mapped to Darwin Core
   select(-c(season_name, census_days, census_date_start, census_date_end, 
-            surveyed_san_telmo, research_program))
+            surveyed_pst, research_program))
 
 stopifnot(
   !any(is.na(event)), 

diff --git a/R/import_amlr.R b/R/import_amlr.R
@@ -15,7 +15,7 @@ library(readxl)
 library(odbc)
 library(here)
 library(hms)
-library(amlrPinnipeds)
+library(tamatoamlr)
 
 
 ### Connect to database and read in relevant tables

diff --git a/R/import_inach.R b/R/import_inach.R
@@ -5,7 +5,7 @@
 library(tidyverse)
 library(readxl)
 library(lubridate)
-library(amlrPinnipeds)
+library(tamatoamlr)
 library(here)
 
 con <- amlr_dbConnect(Database = "AMLR_PINNIPEDS")

diff --git a/R/manuscript_data.R b/R/manuscript_data.R
@@ -5,7 +5,7 @@
 library(dplyr)
 library(readr)
 library(here)
-library(amlrPinnipeds)
+library(tamatoamlr)
 library(worrms)
 
 
@@ -23,18 +23,26 @@ cs.header.orig <- tbl(con, "vCensus_Phocid_Header") %>%
   ungroup() %>% 
   select(header_id, census_phocid_header_id, season_name, 
          census_date_start, census_date_end, census_days, 
-         surveyed_san_telmo, research_program)
+         # TODO: temporary until column name can be updated in database
+         surveyed_pst = surveyed_san_telmo, 
+         research_program) %>% 
+  # TODO: temporary to avoid including half of 2023/24 data
+  filter(census_date_start < as.Date("2023-07-01"))
 
 cs.header <- cs.header.orig %>% select(-census_phocid_header_id)
 
 stopifnot(
-  nrow(cs.header) == nrow(collect(tbl(con, "census_phocid_header")))
+  nrow(cs.header) == nrow(collect(tbl(con, "census_phocid_header")) %>% 
+                            # TODO: temporary to avoid including half of 2023/24 data
+                            filter(census_date_start < as.Date("2023-07-01")))
 )
 
 
 cs.wide <- tbl(con, "vCensus_Phocid") %>% 
   arrange(census_date, species, location_group) %>% 
   rename(header_id = census_phocid_header_id) %>% 
+  # TODO: temporary to avoid including half of 2023/24 data
+  filter(census_date < as.Date("2023-07-01")) %>% 
   collect() %>% 
   select(header_id, observer, census_date, location_group, species, 
          ends_with("_count")) %>% 
@@ -66,7 +74,7 @@ amlr.header <- cs.header %>% filter(research_program == "USAMLR")
 
 cs.core.agg <- cs.wide %>% 
   # Filter for core locations, sum by header_id/species
-  filter(location_group %in% c(amlrPinnipeds::csphoc.core.location.groups)) %>% 
+  filter(location_group %in% c(tamatoamlr::csphoc.core.location.groups)) %>% 
   select(-location_group) %>% 
   group_by(header_id, location = loc.core, species) %>% 
   summarise(across(ends_with("_count"), sum_count),  
@@ -88,7 +96,7 @@ cs.core.complete <- cs.core.agg %>%
 
 
 ### Filter for san telmo locations, group/summarise, and complete
-pst.header <- cs.header %>% filter(surveyed_san_telmo)
+pst.header <- cs.header %>% filter(surveyed_pst)
 
 cs.pst.complete <- cs.wide %>% 
   filter(location_group %in% c(loc.pst)) %>% 
@@ -114,7 +122,7 @@ matched_taxa <- bind_rows(matched_taxa_tibbles) %>%
 stopifnot(nrow(matched_taxa) == 4)
 
 cs.core.pst <- bind_rows(cs.core.complete, cs.pst.complete)%>%
-  amlrPinnipeds::total_count() %>%
+  tamatoamlr::total_count() %>%
   arrange(header_id, location, species) %>%
   relocate(location, .after = header_id) %>%
   relocate(total_count, .before = ad_female_count) %>% 
@@ -149,7 +157,7 @@ write_csv(cs.core.pst, here("data", "manuscript", "cs-phoc-counts.csv"), na = ""
 #   all(cs.header$header_id %in% cs.core.pst$header_id),
 #   all(cs.core.pst$header_id %in% cs.header$header_id),
 #   (nrow(cs.core.pst)) ==
-#     (4 * nrow(cs.header) + 4 * sum(cs.header$surveyed_san_telmo))
+#     (4 * nrow(cs.header) + 4 * sum(cs.header$surveyed_pst))
 # )
 
 

diff --git a/R/manuscript_figures.R b/R/manuscript_figures.R
@@ -10,7 +10,7 @@ library(rnaturalearthhires)
 library(here)
 library(viridis)
 library(stringr)
-library(amlrPinnipeds)
+library(tamatoamlr)
 library(glue)
 library(readr)
 library(lubridate)
@@ -372,7 +372,7 @@ date.test %>%
 # Core census locations
 
 # ### Dot charts - Core locations
-# for (i in amlrPinnipeds::pinniped.phocid.sp) {
+# for (i in tamatoamlr::pinniped.phocid.sp) {
 #   print(i)
 #   if (i == 'Elephant seal') i <- "Southern elephant seal"
 #   i.toplot <- count.toplot.core %>% filter(species_common == i)
@@ -395,7 +395,7 @@ date.test %>%
 # 
 # 
 # ### Line charts - Core locations
-# for (i in amlrPinnipeds::pinniped.phocid.sp) {
+# for (i in tamatoamlr::pinniped.phocid.sp) {
 #   print(i)
 #   if (i == 'Elephant seal') i <- "Southern elephant seal"
 #   i.toplot <- count.toplot.core %>% filter(species_common == i) 
@@ -439,7 +439,7 @@ date.test %>%
 # 
 # 
 # ### Dot counts - Core + PST locations
-# for (i in amlrPinnipeds::pinniped.phocid.sp) {
+# for (i in tamatoamlr::pinniped.phocid.sp) {
 #   print(i)
 #   if (i == 'Elephant seal') i <- "Southern elephant seal"
 #   i.toplot <- count.toplot.combo %>% filter(species_common == i)
@@ -462,7 +462,7 @@ date.test %>%
 # 
 # 
 # ### Line charts - Core + PST locations
-# for (i in amlrPinnipeds::pinniped.phocid.sp) {
+# for (i in tamatoamlr::pinniped.phocid.sp) {
 #   print(i)
 #   if (i == 'Elephant seal') i <- "Southern elephant seal"
 #   i.toplot <- count.toplot.combo %>% filter(species_common == i)

diff --git a/R/old-data_combine.R b/R/old-data_combine.R
@@ -7,7 +7,7 @@
 #-------------------------------------------------------------------------------
 library(tidyverse)
 library(here)
-library(amlrPinnipeds)
+library(tamatoamlr)
 library(waldo)
 
 # Get sums of each count for two data frames, to confirm numbers are consistent

diff --git a/R/smw_qc.R b/R/smw_qc.R
@@ -6,7 +6,7 @@ library(DBI)
 library(lubridate)
 library(ggplot2)
 library(googlesheets4)
-library(amlrPinnipeds)
+library(tamatoamlr)
 
 #-------------------------------------------------------------------------------
 ### Code to check if all species have records for all header records

diff --git a/data/manuscript/cs-phoc-headers.csv b/data/manuscript/cs-phoc-headers.csv
@@ -1,4 +1,4 @@
-header_id,season_name,census_date_start,census_date_end,census_days,surveyed_san_telmo,research_program
+header_id,season_name,census_date_start,census_date_end,census_days,surveyed_pst,research_program
 185,1997/98,1997-12-10,1997-12-10,1,FALSE,INACH
 186,1997/98,1997-12-18,1997-12-18,1,FALSE,INACH
 187,1997/98,1997-12-24,1997-12-24,1,FALSE,INACH