Skip to content

Commit

Permalink
Replace amlrPinnipeds mentions with tamatoamlr. temporary filters to …
Browse files Browse the repository at this point in the history
…remove any 2023/24 data. Use surveyed_pst instead of surveyed_san_telmo
  • Loading branch information
smwoodman committed Feb 29, 2024
1 parent c9169ce commit 4bae361
Show file tree
Hide file tree
Showing 11 changed files with 313 additions and 277 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/data-to-dwc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ jobs:
runs-on: windows-latest
# runs-on: ubuntu-latest #errors out when installing 'units' package
steps:
- uses: actions/checkout@v3
- uses: r-lib/actions/setup-r@v2
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2\
with:
r-version: '4.3.2'
- uses: r-lib/actions/setup-renv@v2

- name: Generate data # Run the script
Expand Down
10 changes: 5 additions & 5 deletions R/data_to_dwc.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Convert CS-PHOC dataset to Darwin Core-compliant tables
# Convert CS-PHOC dataset to Darwin Core compliant records

library(dplyr)
library(readr)
Expand All @@ -7,7 +7,7 @@ library(worrms)
library(here)


# Read data---------------------------------------------------------------------
# Read CSV data-----------------------------------------------------------------
x.header <- read.csv(here("data", "manuscript", "cs-phoc-headers.csv"))
x.count <- read.csv(here("data", "manuscript", "cs-phoc-counts.csv"))

Expand All @@ -30,8 +30,8 @@ event <- x.header %>%
paste(census_date_start, census_date_end, sep = "/")),
# whatever that cannot be mapped to Darwin Core terms goes to dynamicProperties
dynamicProperties = sprintf(
'{"research_program": "%s", "surveyed_san_telmo": %s}',
research_program, if_else(surveyed_san_telmo, "true", "false")
'{"research_program": "%s", "surveyed_pst": %s}',
research_program, if_else(surveyed_pst, "true", "false")
),
# add recommended Darwin Core terms: https://dwc.tdwg.org/terms/#event
decimalLongitude = "-60.77",
Expand All @@ -50,7 +50,7 @@ event <- x.header %>%
rename(eventID = header_id) %>%
# fields that cannot be mapped to Darwin Core
select(-c(season_name, census_days, census_date_start, census_date_end,
surveyed_san_telmo, research_program))
surveyed_pst, research_program))

stopifnot(
!any(is.na(event)),
Expand Down
2 changes: 1 addition & 1 deletion R/import_amlr.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ library(readxl)
library(odbc)
library(here)
library(hms)
library(amlrPinnipeds)
library(tamatoamlr)


### Connect to database and read in relevant tables
Expand Down
2 changes: 1 addition & 1 deletion R/import_inach.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
library(tidyverse)
library(readxl)
library(lubridate)
library(amlrPinnipeds)
library(tamatoamlr)
library(here)

con <- amlr_dbConnect(Database = "AMLR_PINNIPEDS")
Expand Down
22 changes: 15 additions & 7 deletions R/manuscript_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
library(dplyr)
library(readr)
library(here)
library(amlrPinnipeds)
library(tamatoamlr)
library(worrms)


Expand All @@ -23,18 +23,26 @@ cs.header.orig <- tbl(con, "vCensus_Phocid_Header") %>%
ungroup() %>%
select(header_id, census_phocid_header_id, season_name,
census_date_start, census_date_end, census_days,
surveyed_san_telmo, research_program)
# TODO: temporary until column name can be updated in database
surveyed_pst = surveyed_san_telmo,
research_program) %>%
# TODO: temporary to avoid including half of 2023/24 data
filter(census_date_start < as.Date("2023-07-01"))

cs.header <- cs.header.orig %>% select(-census_phocid_header_id)

stopifnot(
nrow(cs.header) == nrow(collect(tbl(con, "census_phocid_header")))
nrow(cs.header) == nrow(collect(tbl(con, "census_phocid_header")) %>%
# TODO: temporary to avoid including half of 2023/24 data
filter(census_date_start < as.Date("2023-07-01")))
)


cs.wide <- tbl(con, "vCensus_Phocid") %>%
arrange(census_date, species, location_group) %>%
rename(header_id = census_phocid_header_id) %>%
# TODO: temporary to avoid including half of 2023/24 data
filter(census_date < as.Date("2023-07-01")) %>%
collect() %>%
select(header_id, observer, census_date, location_group, species,
ends_with("_count")) %>%
Expand Down Expand Up @@ -66,7 +74,7 @@ amlr.header <- cs.header %>% filter(research_program == "USAMLR")

cs.core.agg <- cs.wide %>%
# Filter for core locations, sum by header_id/species
filter(location_group %in% c(amlrPinnipeds::csphoc.core.location.groups)) %>%
filter(location_group %in% c(tamatoamlr::csphoc.core.location.groups)) %>%
select(-location_group) %>%
group_by(header_id, location = loc.core, species) %>%
summarise(across(ends_with("_count"), sum_count),
Expand All @@ -88,7 +96,7 @@ cs.core.complete <- cs.core.agg %>%


### Filter for san telmo locations, group/summarise, and complete
pst.header <- cs.header %>% filter(surveyed_san_telmo)
pst.header <- cs.header %>% filter(surveyed_pst)

cs.pst.complete <- cs.wide %>%
filter(location_group %in% c(loc.pst)) %>%
Expand All @@ -114,7 +122,7 @@ matched_taxa <- bind_rows(matched_taxa_tibbles) %>%
stopifnot(nrow(matched_taxa) == 4)

cs.core.pst <- bind_rows(cs.core.complete, cs.pst.complete)%>%
amlrPinnipeds::total_count() %>%
tamatoamlr::total_count() %>%
arrange(header_id, location, species) %>%
relocate(location, .after = header_id) %>%
relocate(total_count, .before = ad_female_count) %>%
Expand Down Expand Up @@ -149,7 +157,7 @@ write_csv(cs.core.pst, here("data", "manuscript", "cs-phoc-counts.csv"), na = ""
# all(cs.header$header_id %in% cs.core.pst$header_id),
# all(cs.core.pst$header_id %in% cs.header$header_id),
# (nrow(cs.core.pst)) ==
# (4 * nrow(cs.header) + 4 * sum(cs.header$surveyed_san_telmo))
# (4 * nrow(cs.header) + 4 * sum(cs.header$surveyed_pst))
# )


Expand Down
10 changes: 5 additions & 5 deletions R/manuscript_figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ library(rnaturalearthhires)
library(here)
library(viridis)
library(stringr)
library(amlrPinnipeds)
library(tamatoamlr)
library(glue)
library(readr)
library(lubridate)
Expand Down Expand Up @@ -372,7 +372,7 @@ date.test %>%
# Core census locations

# ### Dot charts - Core locations
# for (i in amlrPinnipeds::pinniped.phocid.sp) {
# for (i in tamatoamlr::pinniped.phocid.sp) {
# print(i)
# if (i == 'Elephant seal') i <- "Southern elephant seal"
# i.toplot <- count.toplot.core %>% filter(species_common == i)
Expand All @@ -395,7 +395,7 @@ date.test %>%
#
#
# ### Line charts - Core locations
# for (i in amlrPinnipeds::pinniped.phocid.sp) {
# for (i in tamatoamlr::pinniped.phocid.sp) {
# print(i)
# if (i == 'Elephant seal') i <- "Southern elephant seal"
# i.toplot <- count.toplot.core %>% filter(species_common == i)
Expand Down Expand Up @@ -439,7 +439,7 @@ date.test %>%
#
#
# ### Dot counts - Core + PST locations
# for (i in amlrPinnipeds::pinniped.phocid.sp) {
# for (i in tamatoamlr::pinniped.phocid.sp) {
# print(i)
# if (i == 'Elephant seal') i <- "Southern elephant seal"
# i.toplot <- count.toplot.combo %>% filter(species_common == i)
Expand All @@ -462,7 +462,7 @@ date.test %>%
#
#
# ### Line charts - Core + PST locations
# for (i in amlrPinnipeds::pinniped.phocid.sp) {
# for (i in tamatoamlr::pinniped.phocid.sp) {
# print(i)
# if (i == 'Elephant seal') i <- "Southern elephant seal"
# i.toplot <- count.toplot.combo %>% filter(species_common == i)
Expand Down
2 changes: 1 addition & 1 deletion R/old-data_combine.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#-------------------------------------------------------------------------------
library(tidyverse)
library(here)
library(amlrPinnipeds)
library(tamatoamlr)
library(waldo)

# Get sums of each count for two data frames, to confirm numbers are consistent
Expand Down
2 changes: 1 addition & 1 deletion R/smw_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ library(DBI)
library(lubridate)
library(ggplot2)
library(googlesheets4)
library(amlrPinnipeds)
library(tamatoamlr)

#-------------------------------------------------------------------------------
### Code to check if all species have records for all header records
Expand Down
2 changes: 1 addition & 1 deletion data/manuscript/cs-phoc-headers.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
header_id,season_name,census_date_start,census_date_end,census_days,surveyed_san_telmo,research_program
header_id,season_name,census_date_start,census_date_end,census_days,surveyed_pst,research_program
185,1997/98,1997-12-10,1997-12-10,1,FALSE,INACH
186,1997/98,1997-12-18,1997-12-18,1,FALSE,INACH
187,1997/98,1997-12-24,1997-12-24,1,FALSE,INACH
Expand Down
Loading

1 comment on commit 4bae361

@smwoodman
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressing #1

Please sign in to comment.