Skip to content

Commit

Permalink
feat: functional principal component analysis (#45)
Browse files Browse the repository at this point in the history
* feat: added functional principal component analysis

* feat: implement train_dt and predict_dt methods for FPCA

* test: use expect_pipeop test for all implemented pipeops

* docs(readme): rebuild readme

* docs: add missing tf packge for fpca pipeop

* fix: remove todo_comment_linter from defaults in lintr confg

* fix: missing stats import and fix tests

* docs: param docs for pca

* docs: more description

* refactor: styling

* refactor: replace pcr var name with pc

* docs(fpca): correct title and name

* docs(readme): build readme

* docs(readme): build readme

* docs: fix docs

* fix(fpca): make .args in invoke a list

* docs: remove format section for fpca

* docs for fpca

* readme [skip ci]

---------

Co-authored-by: Sebastian Fischer <[email protected]>
  • Loading branch information
m-muecke and sebffischer authored Mar 28, 2024
1 parent 84fa4a4 commit 27dfc44
Show file tree
Hide file tree
Showing 21 changed files with 333 additions and 42 deletions.
7 changes: 3 additions & 4 deletions .lintr
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
linters: linters_with_defaults(
# lintr defaults: https://github.com/jimhester/lintr#available-linters
# lintr defaults: https://lintr.r-lib.org/reference/default_linters.html
# the following setup changes/removes certain linters
assignment_linter = NULL, # do not force using <- for assignments
object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names
cyclocomp_linter = NULL, # do not check function complexity
commented_code_linter = NULL, # allow code in comments
todo_comment_linter = NULL, # allow todo in comments
line_length_linter = line_length_linter(120),
object_length_linter = object_length_linter(40)
line_length_linter = line_length_linter(120L),
object_length_linter = object_length_linter(40L)
)
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Collate:
'PipeOpFDAFlatten.R'
'PipeOpFDAInterpol.R'
'PipeOpFDASmooth.R'
'PipeOpFPCA.R'
'TaskClassif_phoneme.R'
'TaskRegr_dti.R'
'TaskRegr_fuel.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(PipeOpFDAExtract)
export(PipeOpFDAFlatten)
export(PipeOpFDAInterpol)
export(PipeOpFDASmooth)
export(PipeOpFPCA)
import(R6)
import(checkmate)
import(data.table)
Expand Down
23 changes: 6 additions & 17 deletions R/PipeOpFDAExtract.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#' @export
#' @examples
#' library(mlr3pipelines)
#'
#' task = tsk("fuel")
#' po_fmean = po("fda.extract", features = "mean")
#' task_fmean = po_fmean$train(list(task))[[1L]]
Expand All @@ -57,6 +58,7 @@ PipeOpFDAExtract = R6Class("PipeOpFDAExtract",
#' Identifier of resulting object, default is `"fda.extract"`.
#' @param param_vals (named `list`)\cr
#' List of hyperparameter settings, overwriting the hyperparameter settings that would
#' otherwise be set during construction. Default `list()`.
initialize = function(id = "fda.extract", param_vals = list()) {
param_set = ps(
drop = p_lgl(tags = c("train", "predict", "required")),
Expand Down Expand Up @@ -156,14 +158,7 @@ PipeOpFDAExtract = R6Class("PipeOpFDAExtract",
})
fextractor = make_fextractor(features)

features = map(
cols,
function(col) {
x = dt[[col]]
invoke(fextractor, x = x, left = left, right = right)
}
)

features = map(cols, function(col) invoke(fextractor, x = dt[[col]], left = left, right = right))
features = unlist(features, recursive = FALSE)
features = set_names(features, feature_names)
features = as.data.table(features)
Expand All @@ -188,19 +183,15 @@ make_fextractor = function(features) {
upper = interval[[2L]]

if (is.na(lower) || is.na(upper)) {
res = map(features, function(f) {
rep(NA_real_, length(x)) # no observation in the given interval [left, right]
})
res = map(features, function(f) rep(NA_real_, length(x))) # no observation in the given interval [left, right]
return(res)
}

values = tf::tf_evaluations(x)
arg = args[lower:upper]
res = map(seq_along(x), function(i) {
value = values[[i]]
map(features, function(f) {
f(arg = arg, value = value[lower:upper])
})
map(features, function(f) f(arg = arg, value = value[lower:upper]))
})
return(transform_list(res))
}
Expand All @@ -217,9 +208,7 @@ make_fextractor = function(features) {
if (is.na(lower) || is.na(upper)) {
rep(NA_real_, length(features)) # no observation in the given interval [left, right]
} else {
map(features, function(f) {
f(arg = arg[lower:upper], value = value[lower:upper])
})
map(features, function(f) f(arg = arg[lower:upper], value = value[lower:upper]))
}
})
transform_list(res)
Expand Down
1 change: 1 addition & 0 deletions R/PipeOpFDAFlatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @export
#' @examples
#' library(mlr3pipelines)
#'
#' task = tsk("fuel")
#' pop = po("fda.flatten")
#' task_flat = pop$train(list(task))
Expand Down
3 changes: 2 additions & 1 deletion R/PipeOpFDAInterpol.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@
#' @export
#' @examples
#' library(mlr3pipelines)
#'
#' task = tsk("fuel")
#' pop = po("fda.interpol")
#' task_interpol = pop$train(list(task))[[1]]
#' task_interpol = pop$train(list(task))[[1L]]
#' task_interpol$data()
PipeOpFDAInterpol = R6Class("PipeOpFDAInterpol",
inherit = mlr3pipelines::PipeOpTaskPreprocSimple,
Expand Down
1 change: 1 addition & 0 deletions R/PipeOpFDASmooth.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#' @export
#' @examples
#' library(mlr3pipelines)
#'
#' task = tsk("fuel")
#' po_smooth = po("fda.smooth", method = "rollmean", args = list(k = 5))
#' task_smooth = po_smooth$train(list(task))[[1L]]
Expand Down
91 changes: 91 additions & 0 deletions R/PipeOpFPCA.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' @title Functional Principal Component Analysis
#' @name mlr_pipeops_fda.fpca
#'
#' @description
#' This `PipeOp` applies a functional principal component analysis (FPCA) to functional columns and then
#' extracts the principal components as features. This is done using a (truncated) weighted SVD.
#'
#' To apply this `PipeOp` to irregualr data, convert it to a regular grid first using [`PipeOpFDAInterpol`].
#'
#' For more details, see [`tfb_fpc()`][tf::tfb_fpc], which is called internally.
#'
#'
#' @section Parameters:
#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as the following parameters:
#' * `pve` :: `numeric(1)` \cr
#' The percentage of variance explained that should be retained. Default is `0.995`.
#' * `n_components` :: `integer(1)` \cr
#' The number of principal components to extract. This parameter is initialized to `Inf`.
#'
#' @section Naming:
#' The new names generally append a `_pc_{number}` to the corresponding column name.
#' If a column was called `"x"` and the there are three principcal components, the corresponding
#' new columns will be called `"x_pc_1", "x_pc_2", "x_pc_3"`.
#'
#' @export
#' @examples
#' library(mlr3pipelines)
#'
#' task = tsk("fuel")
#' po_fpca = po("fda.fpca")
#' task_fpca = po_fpca$train(list(task))[[1L]]
#' task_fpca$data()
PipeOpFPCA = R6Class("PipeOpFPCA",
inherit = mlr3pipelines::PipeOpTaskPreproc,
public = list(
#' @description Initializes a new instance of this Class.
#' @param id (`character(1)`)\cr
#' Identifier of resulting object, default is `"fda.fpca"`.
#' @param param_vals (named `list`)\cr
#' List of hyperparameter settings, overwriting the hyperparameter settings that would
#' otherwise be set during construction. Default `list()`.
initialize = function(id = "fda.fpca", param_vals = list()) {
param_set = ps(
pve = p_dbl(default = 0.995, lower = 0, upper = 1, tags = "train"),
n_components = p_int(1L, special_vals = list(Inf), tags = c("train", "required"))
)
param_set$set_values(n_components = Inf)

super$initialize(
id = id,
param_set = param_set,
param_vals = param_vals,
packages = c("mlr3fda", "mlr3pipelines", "tf"),
feature_types = "tfd_reg",
tags = "fda"
)
}
),
private = list(
.train_dt = function(dt, levels, target) {
pars = self$param_set$get_values(tags = "train")

dt = map_dtc(dt, function(x, nm) invoke(tf::tfb_fpc, data = x, .args = remove_named(pars, "n_components")))
self$state = list(fpc = dt)

dt = imap_dtc(dt, function(col, nm) {
map(col, function(x) {
pc = as.list(x[2:min(pars$n_components + 1L, length(x))])
set_names(pc, sprintf("%s_pc_%d", nm, seq_along(pc)))
})
})
unnest(dt, colnames(dt))
},

.predict_dt = function(dt, levels) {
pars = self$param_set$get_values()

dt = imap_dtc(dt, function(col, nm) {
fpc = tf::tf_rebase(col, self$state$fpc[[nm]], arg = tf::tf_arg(col))
map(fpc, function(x) {
pc = as.list(x[2:min(pars$n_components + 1L, length(x))])
set_names(pc, sprintf("%s_pc_%d", nm, seq_along(pc)))
})
})
unnest(dt, colnames(dt))
}
)
)

#' @include zzz.R
register_po("fda.fpca", PipeOpFPCA)
2 changes: 1 addition & 1 deletion R/TaskRegr_dti.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ load_task_dti = function(id = "dti") {
rcst = tf::tfd(dti$rcst, arg = seq(0L, 1L, length.out = 55L)),
sex = dti$sex
)
dti = na.omit(dti)
dti = stats::na.omit(dti)
b = as_data_backend(dti)

task = TaskRegr$new(
Expand Down
1 change: 0 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ Package Website: [dev](https://mlr3fda.mlr-org.com/)
Extending mlr3 to functional data.

<!-- badges: start -->
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
[![RCMD Check](https://github.com/mlr-org/mlr3fda/actions/workflows/rcmdcheck.yaml/badge.svg)](https://github.com/mlr-org/mlr3fda/actions/workflows/rcmdcheck.yaml)
[![CRAN status](https://www.r-pkg.org/badges/version/mlr3fda)](https://CRAN.R-project.org/package=mlr3fda)
[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3)
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ Extending mlr3 to functional data.

<!-- badges: start -->

[![Lifecycle:
experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
[![RCMD
Check](https://github.com/mlr-org/mlr3fda/actions/workflows/rcmdcheck.yaml/badge.svg)](https://github.com/mlr-org/mlr3fda/actions/workflows/rcmdcheck.yaml)
[![CRAN
Expand Down Expand Up @@ -127,6 +125,7 @@ glrn$predict(task, row_ids = ids$test)
|:-------------------------------------------------------------------------------|:-------------------------------------------------|:---------------------------------------------------|:--------------------|
| [fda.extract](https://mlr3fda.mlr-org.com/reference/mlr_pipeops_fda.extract) | Extracts Simple Features from Functional Columns | [tf](https://cran.r-project.org/package=tf) | fda, data transform |
| [fda.flatten](https://mlr3fda.mlr-org.com/reference/mlr_pipeops_fda.flatten) | Flattens Functional Columns | [tf](https://cran.r-project.org/package=tf) | fda, data transform |
| [fda.fpca](https://mlr3fda.mlr-org.com/reference/mlr_pipeops_fda.fpca) | Functional Principal Component Analysis | [tf](https://cran.r-project.org/package=tf) | fda, data transform |
| [fda.interpol](https://mlr3fda.mlr-org.com/reference/mlr_pipeops_fda.interpol) | Interpolate Functional Columns | [tf](https://cran.r-project.org/package=tf) | fda, data transform |
| [fda.smooth](https://mlr3fda.mlr-org.com/reference/mlr_pipeops_fda.smooth) | Smoothing Functional Columns | [tf](https://cran.r-project.org/package=tf), stats | fda, data transform |

Expand Down
4 changes: 3 additions & 1 deletion man/mlr_pipeops_fda.extract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/mlr_pipeops_fda.flatten.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

100 changes: 100 additions & 0 deletions man/mlr_pipeops_fda.fpca.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/mlr_pipeops_fda.interpol.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/mlr_pipeops_fda.smooth.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 27dfc44

Please sign in to comment.