Skip to content

Commit

Permalink
Merge pull request #70 from sipss/feat-autophase
Browse files Browse the repository at this point in the history
autophase
  • Loading branch information
zeehio authored Jun 9, 2024
2 parents 036c457 + 96e54fe commit 9b1d566
Show file tree
Hide file tree
Showing 55 changed files with 454 additions and 269 deletions.
19 changes: 13 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: AlpsNMR
Type: Package
Title: Automated spectraL Processing System for NMR
Version: 4.7.0
Date: 2023-02-16
Version: 4.7.1
Date: 2024-06-02
Encoding: UTF-8
Authors@R: c(
person(given = "Ivan",
Expand Down Expand Up @@ -39,7 +39,12 @@ Authors@R: c(
person(given = "Nestlé Institute of Health Sciences",
role = "cph"),
person(given = "Institute for Bioengineering of Catalonia",
role = "cph")
role = "cph"),
person(given = "Miller",
family = "Jack",
email = "[email protected]",
role=c("ctb"),
comment = c(ORCID = "0000-0002-6258-1299", "Autophase wrapper, ASICS export"))
)
Description: Reads Bruker NMR data directories both zipped and unzipped.
It provides automated and efficient signal processing for untargeted
Expand All @@ -54,7 +59,7 @@ License: MIT + file LICENSE
URL: https://sipss.github.io/AlpsNMR/, https://github.com/sipss/AlpsNMR
BugReports: https://github.com/sipss/AlpsNMR/issues
LazyData: FALSE
Depends: R (>= 4.2), future (>= 1.10.0)
Depends: R (>= 4.2)
Imports:
utils,
generics,
Expand Down Expand Up @@ -85,8 +90,9 @@ Imports:
ggplot2 (>= 3.1.0),
baseline (>= 1.2-1),
vctrs (>= 0.3.0),
BiocParallel
BiocParallel (>= 1.34.0)
Suggests:
ASICS,
BiocStyle,
ChemoSpec,
cowplot,
Expand All @@ -96,6 +102,7 @@ Suggests:
ggrepel (>= 0.8.0),
gridExtra,
knitr,
NMRphasing,
plotly (>= 4.7.1),
progressr,
SummarizedExperiment,
Expand All @@ -105,6 +112,6 @@ Suggests:
zip (>= 2.0.4)
biocViews: Software, Preprocessing, Visualization, Classification,
Cheminformatics, Metabolomics, DataImport
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export(new_nmr_dataset_1D)
export(new_nmr_dataset_peak_table)
export(nmr_align)
export(nmr_align_find_ref)
export(nmr_autophase)
export(nmr_baseline_estimation)
export(nmr_baseline_removal)
export(nmr_baseline_threshold)
Expand Down Expand Up @@ -130,14 +131,14 @@ export(rename)
export(save_files_to_rDolphin)
export(save_profiling_output)
export(tidy)
export(to_ASICS)
export(to_ChemoSpec)
export(validate_nmr_dataset)
export(validate_nmr_dataset_1D)
export(validate_nmr_dataset_family)
export(validate_nmr_dataset_peak_table)
importFrom(dplyr,filter)
importFrom(dplyr,rename)
importFrom(future,plan)
importFrom(generics,tidy)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# AlpsNMR 4.7.1 (2024-06-02)

- Added `nmr_autophase()` for automated phase correction using the NMRphasing package (#68).
- Added `to_ASICS` function to export dataset for ASICS quantification (#68).

# AlpsNMR 4.1.6 (2023-02-16)

- Download improvements:
Expand Down
1 change: 0 additions & 1 deletion R/import_jdx.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ process_block <- function(lines, metadata_only = FALSE) {
#' @keywords internal
#' @noRd
read_jdx <- function(file_names, metadata_only = FALSE) {
warn_future_to_biocparallel()
output <- BiocParallel::bplapply(
X = file_names,
FUN = function(file_name) {
Expand Down
111 changes: 111 additions & 0 deletions R/nmr_autophase.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#' @title Rephase 1D NMR data
#' @description
#' Use phasing algorithms to rephase data in the spectral domain.
#'
#' This function may improve autophasing processing from instrument vendors. It
#' wraps the [NMRphasing::NMRphasing()] function, to automatically rephase spectra,
#' allowing you to choose from a number of algorithms
#' of which `NLS`, `MPC_DANM` and `SPC_DANM` are the most recent.
#'
#' Rephasing should happen before any spectra interpolation.
#'
#' Please use the `all_components = TRUE` when calling [nmr_read_samples()] in order
#' to load the complex spectra and fix NMR phasing correctly.
#'
#' @param dataset An [nmr_dataset] object
#' @param method The autophasing method. See [NMRphasing::NMRphasing()] for details.
#' @param withBC `NMRphasing::NMRphasing` may perform a baseline correction using modified polynomial fitting. By default
#' AlpsNMR offers other baseline estimation methods and better visualization of its effect, so AlpsNMR by default
#' disables the baseline correction offered by NMRphasing.
#' @param ... Other parameters passed on to [NMRphasing::NMRphasing()].
#' @return A (hopefully better phased) [nmr_dataset] object, with updated real and imaginary parts.
#' @examples
#' if (requireNamespace("NMRphasing", quietly=TRUE)) {
#' # Helpers to create a dataset:
#' lorentzian <- function(x, x0, gamma, A) {
#' A * (1 / (pi * gamma)) * ((gamma^2) / ((x - x0)^2 + gamma^2))
#' }
#' x <- seq(from=1, to=2, length.out = 300)
#' y <- lorentzian(x, 1.3, 0.01, 1) + lorentzian(x, 1.6, 0.01, 1)
#' dataset <- new_nmr_dataset(
#' metadata = list(external = data.frame(NMRExperiment = "10")),
#' data_fields = list(data_1r = list(y)),
#' axis = list(list(x))
#' )
#' # Autophase, interpolate and plot:
#' dataset <- nmr_autophase(dataset, method = "NLS")
#' dataset <- nmr_interpolate_1D(dataset, axis = c(min = 1, max = 2, by = 0.01))
#' plot(dataset)
#' }
#' @export
nmr_autophase <- function(dataset,
method = c("NLS", "MPC_DANM", "MPC_EMP", "SPC_DANM", "SPC_EMP", "SPC_AAM", "SPC_DSM"),
withBC = FALSE, ...) {
require_pkgs("NMRphasing")
if (!"data_1i" %in% names(c(dataset))) {
cli::cli_warn(c(
"!" = "nmr_autophase() performs better with access to the whole complex NMR spectra",
"i" = "Please read the dataset using {.code all_components=TRUE}.",
"i" = "See {.fun AlpsNMR::nmr_autophase} for a full example"
))
}

if (inherits(dataset, "nmr_dataset_1D")) {
cli::cli_abort(c(
"x" = "nmr_autophase() expects non-interpolated spectra",
"i" = "Please use nmr_autophase() before calling nmr_interpolate_1D()",
"i" = "See {.fun AlpsNMR::nmr_autophase} for a full example"
))
}

method <- match.arg(method)

real_list_of_spectra <- dataset$data_1r
imag_list_of_spectra <- dataset$data_1i
absorptionOnly <- FALSE
if (is.null(imag_list_of_spectra)) {
imag_list_of_spectra <- vector(mode="list", length=dataset$num_samples)
} else {
any_imag_missing <- purrr::map_lgl(imag_list_of_spectra, is.null)
any_imag_missing <- any_imag_missing[any_imag_missing]
if (length(any_imag_missing) > 0) {
if (length(any_imag_missing < 7)) {
miss_sample_names <- paste0(names(any_imag_missing), collapse = ", ")
msg <- "Samples without imaginary component: {miss_sample_names}"
} else {
miss_sample_names <- paste0(names(utils::head(any_imag_missing, n=5)), collapse = ", ")
msg <- "Samples without imaginary component: {miss_sample_names} and {length(any_imag_missing)-5} more"
}
cli::cli_warn(c(
"!" = "{length(any_imag_missing)}/{dataset$num_samples} samples have a missing imaginary spectrum",
"i" = msg,
"i" = "Estimating autophase using only the absorption"
))
}
}

real_imag_lists <- BiocParallel::bpmapply(
FUN = function(real, imag, ...) {
if (!is.null(imag)) {
absorptionOnly <- FALSE
to_phase <- complex(
real = real,
imaginary = imag
)
} else {
absorptionOnly <- TRUE
to_phase <- real
}
phased <- NMRphasing::NMRphasing(to_phase, absorptionOnly = TRUE, ...)
list(real = Re(phased), imag = Im(phased))
},
real_list_of_spectra, imag_list_of_spectra,
MoreArgs = list(method = method, withBC = withBC, ...),
SIMPLIFY = FALSE
)
dataset[["data_1r"]] <- purrr::map(real_imag_lists, "real")
if ("data_1i" %in% c(dataset)) {
dataset[["data_1i"]] <- purrr::map(real_imag_lists, "imag")
}
dataset
}
3 changes: 1 addition & 2 deletions R/nmr_data_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,7 @@ split_build_perform <- function(train_test_subset,
do_cv <- function(dataset, y_column, identity_column, train_evaluate_model,
train_test_subsets, train_evaluate_model_args_iter = NULL, ..., .enable_parallel = TRUE) {
if (.enable_parallel) {
warn_future_to_biocparallel()
fun <- mymapply
fun <- BiocParallel::bpmapply
} else {
fun <- mapply
}
Expand Down
1 change: 0 additions & 1 deletion R/nmr_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@ nmr_read_samples_bruker <-
if (length(sample_names) == 0) {
stop("No samples to load")
}
warn_future_to_biocparallel()
list_of_samples <- BiocParallel::bplapply(
X = sample_names,
FUN = function(sampl, pulse_sequence, metadata_only, ...) {
Expand Down
7 changes: 3 additions & 4 deletions R/nmr_detect_peaks_align.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ NULL
#' @param baselineThresh All peaks with intensities below the thresholds are excluded. Either:
#' - A numeric vector of length the number of samples. Each number is a threshold for that sample
#' - A single number. All samples use this number as baseline threshold.
#' - `NULL`. If that's the case, a default function is used ([nmr_baseline_threshold()])
#' - `NULL`. If that's the case, a default function is used ([nmr_baseline_threshold()]), which assumes
#' that there is no signal in the region 9.5-10 ppm.
#' @inheritParams speaq::detectSpecPeaks
#' @param range_without_peaks A numeric vector of length two with a region without peaks, only used when `baselineThresh = NULL`
#' @param fit_lorentzians If `TRUE`, fit a lorentzian to each detected peak, to infer its inflection points. For now disabled for backwards compatibility.
Expand Down Expand Up @@ -175,8 +176,7 @@ nmr_detect_peaks <- function(nmr_dataset,
)
}

warn_future_to_biocparallel()
peakList <- mymapply(
peakList <- BiocParallel::bpmapply(
FUN = callDetectSpecPeaks,
X = data_matrix_to_list,
baselineThresh = baselineThresh,
Expand Down Expand Up @@ -493,7 +493,6 @@ nmr_detect_peaks_tune_snr <- function(ds,
ds1 <- filter(ds, NMRExperiment == !!NMRExperiment)
names(SNR_thresholds) <- SNR_thresholds

warn_future_to_biocparallel()
peaks_detected_list <- BiocParallel::bplapply(
X = SNR_thresholds,
FUN = function(SNR.Th, nmr_dataset, ...) {
Expand Down
50 changes: 30 additions & 20 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,26 +228,6 @@ progress_bar_end <- function(pb) {
}
}

#' @importFrom future plan
warn_future_to_biocparallel <- function() {
# REMOVE THIS WARNING >ONE YEAR AFTER IT'S BEEN RELEASED to BIOCONDUCTOR
current_plan <- future::plan()
if (inherits(current_plan, "sequential")) {
return()
}
rlang::warn(
message = c(
"AlpsNMR now uses BiocParallel instead of future for parallellization",
"i" = "If you used plan(multisession) or any other plan(), consider removing all plan() calls and use:\n library(BiocParallel)\n register(SnowParam(workers = 3), default = TRUE)",
"i" = "You just need to place that code once, typically at the beginning of your script"
),
class = "AlpsNMR-future-to-biocparallel-warning",
.frequency = "once",
.frequency_id = "future-to-biocparallel"
)
return()
}


#' Convert to ChemoSpec Spectra class
#' @param nmr_dataset An [nmr_dataset_1D] object
Expand Down Expand Up @@ -288,6 +268,36 @@ to_ChemoSpec <- function(nmr_dataset, desc = "A nmr_dataset", group = NULL) {
return(Spectra)
}

#' @title Export data for the ASICS spectral quantification library
#' @description
#' Exports the spectra matrix, sample names and chemical shift axis into
#' an ASICS Spectra object.
#'
#' @param dataset An [nmr_dataset_1D] object
#' @inheritDotParams ASICS::createSpectra -spectra
#' @return An [ASICS::Spectra-class] object
#' @examples
#' if (requireNamespace("ASICS", quietly=TRUE)) {
#' nsamp <- 3
#' npoints <- 300
#' metadata <- list(external = data.frame(
#' NMRExperiment = paste0("Sample", seq_len(nsamp))
#' ))
#' dataset <- new_nmr_dataset_1D(
#' ppm_axis = seq(from = 0.2, to = 10, length.out = npoints),
#' data_1r = matrix(runif(nsamp * npoints), nrow = nsamp, ncol = npoints),
#' metadata = metadata
#' )
#' forAsics <- to_ASICS(dataset)
#' #ASICS::ASICS(forAsics)
#' }
#' @export
to_ASICS <- function(dataset, ...) {
require_pkgs("ASICS")
spectra_matrix <- t(nmr_data(dataset))
ASICS::createSpectra(spectra_matrix, ...)
}


abort_if_not <- function(condition, ...) {
if (!condition) {
Expand Down
Loading

0 comments on commit 9b1d566

Please sign in to comment.