Merge pull request #64 from roaldarbol/dev

v0.4
roaldarbol · Nov 12, 2024 · bd30cde · bd30cde
2 parents dde8f7f + a43591a
commit bd30cde
Show file tree

Hide file tree

Showing 29 changed files with 1,899 additions and 19,723 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: animovement
 Type: Package
 Title: An R toolbox for analysing animal movement across space and time
-Version: 0.3.0
+Version: 0.4.0
 Authors@R: 
     person(
       "Mikkel", 

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ export(calculate_kinematics)
 export(calculate_statistics)
 export(calculate_straightness)
 export(clean_kinematics)
+export(does_file_have_expected_headers)
 export(ensure_file_has_expected_headers)
 export(ensure_file_has_headers)
 export(group_every)
@@ -28,6 +29,7 @@ import(dplyr)
 import(rhdf5)
 import(tidyr)
 import(tidyselect)
+import(vroom)
 importFrom(circular,circular)
 importFrom(circular,is.circular)
 importFrom(cli,cli_abort)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# animovement 0.4.0
+
+Added readers for AnimalTA (`read_animalta`) and idtracker.ai (`read_idtracker`).
+
 # animovement 0.3.0
 
 Has added the ability to read centroid tracking from Bonsai files through `read_bonsai()`.

diff --git a/R/read_animalta.R b/R/read_animalta.R
@@ -4,9 +4,76 @@
 #' `r lifecycle::badge('experimental')`
 #'
 #' @param path An AnimalTA data frame
+#' @param with_roi Were one or more ROIs used?
+#'
+#' @import dplyr
+#' @import vroom
+#' @importFrom janitor clean_names
 #'
 #' @return a movement dataframe
 #' @export
-read_animalta <- function(path) {
-  cli::cli_abort("`read_animalta` has not yet been implemented. Coming soon!")
+read_animalta <- function(path, with_roi = FALSE) {
+  # Inspect headers
+  if (with_roi == FALSE){
+    validate_files(
+      path,
+      expected_suffix = "csv",
+      expected_headers = c("X", "Y", "Time")
+      )
+    data <- read_animalta_no_roi(path)
+  } else {
+    validate_files(
+      path,
+      expected_suffix = "csv",
+      expected_headers = c("Time", "X_Arena0_Ind0", "Y_Arena0_Ind0")
+    )
+    data <- read_animalta_with_roi(path)
+  }
+  data <- data |>
+    dplyr::mutate(keypoint = factor("centroid")) |>
+    dplyr::relocate("keypoint", .after = "individual")
+  return(data)
+}
+
+#' @inheritParams read_animalta
+#' @keywords internal
+read_animalta_no_roi <- function(path){
+  data <- vroom::vroom(
+    path,
+    delim = ";",
+    show_col_types = FALSE
+  ) |>
+  janitor::clean_names() |>
+    dplyr::mutate(frame = as.numeric(.data$frame),
+                  time = as.numeric(.data$time)) |>
+    dplyr::rename(individual = "ind") |>
+    dplyr::mutate(individual = factor(.data$individual)) |>
+    dplyr::select(-c("frame", "arena"))
+  attributes(data)$spec <- NULL
+  attributes(data)$problems <- NULL
+  return(data)
+}
+
+#' @inheritParams read_animalta
+#' @import tidyr
+#' @keywords internal
+read_animalta_with_roi <- function(path){
+  data <- vroom::vroom(
+    path,
+    delim = ";",
+    show_col_types = FALSE
+  ) |>
+    janitor::clean_names()
+
+  data <- data |>
+    tidyr::pivot_longer(cols = 3:ncol(data),
+                        names_to = c("coordinate", "individual", "arena"),
+                        names_sep = "_",
+                        values_to = "val") |>
+    tidyr::pivot_wider(id_cols = c("time", "individual", "arena"),
+                       names_from = "coordinate",
+                       values_from = "val") |>
+    tidyr::unite("individual", c("individual", "arena")) |>
+    dplyr::mutate(individual = factor(.data$individual))
+  return(data)
 }
diff --git a/R/read_idtracker.R b/R/read_idtracker.R
@@ -3,10 +3,132 @@
 #' @description
 #' `r lifecycle::badge('experimental')`
 #'
-#' @param data An idtracker.ai data frame
+#' @param path Path to an idtracker.ai data frame
+#' @param path_probabilities Path to a csv file with probabilities. Only needed if you are reading csv files as they are included in h5 files.
+#' @param version idtracker.ai version. Currently only v6 output is implemented
+#'
+#' @import dplyr
+#' @import tidyr
+#' @import rhdf5
+#' @importFrom vroom vroom
+#' @importFrom janitor clean_names
 #'
 #' @return a movement dataframe
 #' @export
-read_idtracker <- function(data) {
-  cli::cli_abort("`read_idtracker` has not yet been implemented. Coming soon!")
+read_idtracker <- function(path, path_probabilities = NULL, version = 6) {
+  # Needs to check the file extension
+  # If probabilites are given, extension needs to be csv
+  validate_files(path, expected_suffix = c("csv", "h5"))
+  if (!is.null(path_probabilities) & .get_file_ext(path) == "h5"){
+    cli::cli_warn("You supplied a h5 file and probabilities in csv; the h5 data already contains the probabilities, so we only the h5 data.")
+  }
+  if (.get_file_ext(path) == "csv"){
+    data <- read_idtracker_csv(path, path_probabilities, version = version)
+  } else if(.get_file_ext(path) == "h5"){
+    data <- read_idtracker_h5(path, version = version)
+  }
+
+  return(data)
+}
+
+#' @inheritParams read_idtracker
+#' @keywords internal
+read_idtracker_csv <- function(path, path_probabilities, version = 6){
+  data <- vroom::vroom(
+    path,
+    delim = ",",
+    show_col_types = FALSE
+  ) |>
+    suppressMessages() |>
+    janitor::clean_names()
+
+  data <- data |>
+    tidyr::pivot_longer(cols = 2:ncol(data),
+                        names_to = c("coordinate", "individual"),
+                        names_sep = "(?<=[A-Za-z])(?=[0-9])",
+                        values_to = "val"
+                        ) |>
+    tidyr::pivot_wider(id_cols = c("seconds", "individual"),
+                       names_from = "coordinate",
+                       values_from = "val") |>
+    dplyr::rename(time = "seconds") |>
+    dplyr::mutate(individual = factor(.data$individual))
+
+  if (!is.null(path_probabilities)){
+    probs <- read_idtracker_probabilities(path_probabilities)
+    data <- dplyr::left_join(data, probs, by = c("individual", "time"))
+  }
+
+  # Convert NaN to NA
+  data <- data |>
+    dplyr::mutate(dplyr::across(dplyr::everything(), ~ifelse(is.nan(.), NA, .))) |>
+    dplyr::mutate(individual = factor(.data$individual),
+                  keypoint = factor("centroid")) |>
+    dplyr::relocate("keypoint", .after = "individual")
+
+  return(data)
+}
+
+#' @inheritParams read_idtracker
+#' @keywords internal
+read_idtracker_probabilities <- function(path){
+  data <- vroom::vroom(
+    path,
+    delim = ",",
+    show_col_types = FALSE
+  ) |>
+    suppressMessages() |>
+    janitor::clean_names()
+
+  data <- data |>
+    tidyr::pivot_longer(cols = 2:ncol(data),
+                        names_to = c("placeholder", "individual"),
+                        names_sep = "(?<=[A-Za-z])(?=[0-9])",
+                        values_to = "confidence"
+    ) |>
+    dplyr::select(-"placeholder") |>
+    dplyr::rename(time = "seconds")
+}
+
+#' @inheritParams read_idtracker
+#' @keywords internal
+read_idtracker_h5 <- function(path, version = version){
+  traj_dimensions <- rhdf5::h5ls(path) |>
+    dplyr::as_tibble(.name_repair = "unique") |>
+    dplyr::filter(.data$name == "trajectories") |>
+    dplyr::pull(dim) |>
+    strsplit(" x ")
+
+  n_individuals <- traj_dimensions[[1]][2] |> as.numeric()
+
+  data <- data.frame()
+  for (i in 1:n_individuals){
+    trajectories <- rhdf5::h5read(path, "trajectories")[,i,] |>
+      t() |>
+      dplyr::as_tibble(.name_repair = "unique") |>
+      suppressMessages() |>
+      dplyr::rename(x = "...1",
+                    y = "...2")
+
+    probs <- rhdf5::h5read(path, "id_probabilities")[,i,] |>
+      dplyr::as_tibble(.name_repair = "unique") |>
+      dplyr::rename(confidence = "value")
+
+    data_temp <- dplyr::bind_cols(trajectories, probs) |>
+      dplyr::mutate(individual = factor(i),
+                    keypoint = factor("centroid"),
+                    time = row_number())
+
+    data <- dplyr::bind_rows(data, data_temp)
+  }
+
+  data <- data |>
+    # Convert NaN to NA
+    dplyr::mutate(dplyr::across(dplyr::everything(), ~ifelse(is.nan(.), NA, .))) |>
+    dplyr::relocate("keypoint", .before = "x") |>
+    dplyr::relocate("individual", .before = "keypoint") |>
+    dplyr::relocate("time", .before = "individual") |>
+    dplyr::mutate(individual = factor(.data$individual),
+                  keypoint = factor(.data$keypoint))
+  return(data)
 }
diff --git a/R/read_trackball.R b/R/read_trackball.R
@@ -37,7 +37,7 @@ read_trackball <- function(
     distance_scale = NULL,
     distance_unit = NULL,
     verbose = FALSE) {
-  validate_files(paths, expected_suffix = "csv", expected_headers = c("x", "y", "time"))
+  validate_files(paths, expected_suffix = "csv") #expected_headers = c("x", "y", "time")
   validate_trackball(paths, setup, col_time)
   n_sensors <- length(paths)
 
@@ -80,7 +80,7 @@ read_trackball <- function(
 #' @keywords internal
 read_opticalflow <- function(path, col_time, verbose = FALSE) {
   # Read file
-  if (ensure_file_has_expected_headers(path, c("x", "y", "time"))) {
+  if (does_file_have_expected_headers(path, c("x", "y", "time"))) {
     data <- vroom::vroom(
       path,
       delim = ",",

diff --git a/R/validator_files.R b/R/validator_files.R
@@ -24,6 +24,7 @@ validate_files <- function(
     expected_permission = "r",
     expected_suffix = NULL,
     expected_headers = NULL) {
+
   # Perform checks on all supplied paths
   for (p in path) {
     ensure_is_not_dir(p)
@@ -32,8 +33,6 @@ validate_files <- function(
     if (!is.null(expected_suffix)) {
       ensure_file_has_expected_suffix(p, expected_suffix)
     }
-
-    # Check file headers
     if (!is.null(expected_headers)){
       ensure_file_has_headers(p)
       ensure_file_has_expected_headers(p, expected_headers)
@@ -94,20 +93,42 @@ ensure_file_has_headers <- function(path) {
   df <- vroom::vroom(
     path,
     n_max = 10,
-    delim = ",",
+    # delim = ",",
     show_col_types = FALSE,
     .name_repair = "unique"
   ) |>
     suppressMessages()
   has_headers <- ncol(df) > 1
-  return(has_headers)
+  if (has_headers != TRUE) {
+    cli::cli_abort("Expected file headers(es), but found none.")
+  }
+  # return(has_headers)
 }
 
 #' Ensure file has expected headers
 #' @inheritParams validate_files
 #' @keywords internal
 #' @export
 ensure_file_has_expected_headers <- function(path, expected_headers = c("x", "y", "time")) {
+  df <- vroom::vroom(
+    path,
+    n_max = 10,
+    # delim = ",",
+    show_col_types = FALSE,
+    .name_repair = "unique"
+  ) |>
+    suppressMessages()
+  has_correct_headers <- all(expected_headers %in% names(df))
+  if (has_correct_headers != TRUE) {
+    cli::cli_abort("Expected the following file headers: {expected_headers}, but they were not present.")
+  }
+}
+
+#' Check whether file has expected headers
+#' @inheritParams validate_files
+#' @keywords internal
+#' @export
+does_file_have_expected_headers <- function(path, expected_headers = c("x", "y", "time")) {
   df <- vroom::vroom(
     path,
     n_max = 10,

diff --git a/R/validator_trackball.R b/R/validator_trackball.R
@@ -57,7 +57,8 @@ ensure_identical_suffix <- function(paths) {
 #' @inheritParams validate_trackball
 #' @keywords internal
 ensure_header_match <- function(path, col_time) {
-  if (!ensure_file_has_headers(path) & is.character(col_time)) {
+  does_file_have_expected_headers(path)
+  if (is.character(col_time)) {
     cli::cli_abort("`col_time` is a string ({col_time}), but the file doesn't have named headers. Either use a column number or provide a file with named headers.")
   }
 }
diff --git a/man/calculate_kinematics.Rd b/man/calculate_kinematics.Rd
diff --git a/man/does_file_have_expected_headers.Rd b/man/does_file_have_expected_headers.Rd
diff --git a/man/read_animalta.Rd b/man/read_animalta.Rd
diff --git a/man/read_idtracker.Rd b/man/read_idtracker.Rd