Skip to content

Commit

Permalink
maskPed() gains arguments ids, markerNames, alleleLabels
Browse files Browse the repository at this point in the history
  • Loading branch information
magnusdv committed Oct 31, 2023
1 parent d64a879 commit 356ba6e
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 52 deletions.
103 changes: 60 additions & 43 deletions R/ped_mask.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
#' Mask and unmask pedigree datasets
#'
#' The `maskPed()` function replaces the individual IDs, marker names and allele
#' labels with generic sequences like 1, 2, ... For markers with stepwise
#' mutation models, the allelic ladder is simply translated to start at 1,
#' thereby preserving the intra-allelic differences.
#' names with generic labels, and randomly changes their internal order. For
#' markers with stepwise mutation models, the allelic ladder is simply
#' translated to start at 1, thereby preserving the intra-allelic differences.
#'
#' It should be noted that when the masking procedure is applied to a dataset
#' using publicly available frequency databases, the result cannot be considered
#' to be fully anonymised. (In theory, one could deduce the original marker
#' names and alleles from the frequencies.)
#' Note that in order to preserve likelihoods, the allele frequencies are not
#' modified. Thus, if the data uses a publicly available frequency databases,
#' the result cannot be considered to be fully anonymised, since one could (at
#' least in theory) deduce the original marker names and alleles from the
#' frequencies.)
#'
#' @param x A `ped` object or a list of such.
#' @param ids (Optional) A named character with the new IDs, written as `c(old = new,
#' ...)`. By default: 1, 2, ... .
#' @param markerNames (Optional) A named character with the new marker names (and order),
#' written as `c(old = new, ...)`. By default: M1, M2, ... .
#' @param alleleLabels (Optional) A list of character vectors. The list names should be the
#' original marker names. Each vector gives the new allele labels, as `c(old =
#' new, ...)`. By default, each marker gets alleles 1, 2, ... .
#' @param seed An optional seed for the random number generator.
#' @param keys A list with entries `ids`, `markers`, `alleles`.
#' @param keys A list with entries `ids`, `markerNames`, `alleleLabels`.
#'
#' @return An object similar to `x` but with replaced ID labels, marker names
#' and allele labels.
Expand Down Expand Up @@ -47,54 +55,63 @@
#'
#' @importFrom stats setNames
#' @export
maskPed = function(x, seed = NULL) {
maskPed = function(x, ids = NULL, markerNames = NULL, alleleLabels = NULL, seed = NULL) {

if(!is.null(seed))
set.seed(seed)

# Individuals 1,2,...
ids = unlist(labels(x), use.names = FALSE)
newids = as.character(seq_along(ids))
names(newids) = ids
y = relabel(x, newids)
# Individuals (default: 1,2,...)
if(is.null(ids)) {
oldids = unlist(labels(x), use.names = FALSE)
ids = as.character(seq_along(oldids))
names(ids) = oldids
}
y = relabel(x, ids)

nm = nMarkers(y)
if(nm == 0)
return(list(maskedPed = y, keys = list(ids = newids)))
return(list(maskedPed = y, keys = list(ids = ids)))

mnames = name(y)
if(anyNA(mnames))
oldnames = name(y)
if(anyNA(oldnames))
stop2("The masking procedure requires all markers to be named")

# Create new allele labels
newAls = lapply(1:nm, function(i) {
als = alleles(y, marker = i)
alsNum = suppressWarnings(as.numeric(als))
if(!any(is.na(alsNum)))
nw = round(alsNum - min(alsNum) + 1, 1)
else
nw = sample.int(length(als))
setNames(as.character(nw), als)
})
names(newAls) = name(y)
# Allele labels
if(is.null(alleleLabels)) {
alleleLabels = lapply(1:nm, function(i) {
alsOld = alleles(y, marker = i)
alsNum = suppressWarnings(as.numeric(alsOld))
if(!any(is.na(alsNum)))
als = round(alsNum - round(min(alsNum)) + 1, 1)
else
als = sample.int(length(alsOld))
setNames(as.character(als), alsOld)
})
names(alleleLabels) = oldnames
}

# Apply new allele labels
for(i in 1:nm)
y = setAlleleLabels(y, marker = i, alleles = newAls[[i]])
for(m in oldnames)
y = setAlleleLabels(y, marker = m, alleles = alleleLabels[[m]])

# Shuffle markers
shuffledNames = sample(mnames)
y = selectMarkers(y, markers = shuffledNames)
# Shuffle and rename markers (default: M1, M2, ...)
if(is.null(markerNames)) {
shuffle = sample(oldnames)
markerNames = setNames(paste0("M", 1:nm), shuffle)
}
else {
shuffle = names(markerNames)
}

# Marker names M1, M2, ...
newnames = setNames(paste0("M", 1:nm), shuffledNames)
y = setMarkername(y, name = newnames)
y = selectMarkers(y, markers = shuffle)
y = setMarkername(y, name = markerNames)

# Sort `newnames` in original order
newnames = newnames[mnames]
# For key: sort markerNames in original order
markerNames = markerNames[oldnames]

# Return pedigree and keys for unmasking
list(maskedPed = y, keys = list(ids = newids, alleles = newAls, markernames = newnames))
list(maskedPed = y, keys = list(ids = ids, alleleLabels = alleleLabels,
markerNames = markerNames))
}

#' @rdname maskPed
Expand All @@ -105,14 +122,14 @@ unmaskPed = function(x, keys) {
y = relabel(x, .flipNames(keys$ids))

# Restore marker names
y = setMarkername(y, name = .flipNames(keys$markernames))
y = setMarkername(y, name = .flipNames(keys$markerNames))

# Restore marker order
y = selectMarkers(y, names(keys$markernames))
y = selectMarkers(y, names(keys$markerNames))

# Restore allele labels
for(m in names(keys$alleles))
y = setAlleleLabels(y, marker = m, alleles = .flipNames(keys$alleles[[m]]))
for(m in names(keys$alleleLabels))
y = setAlleleLabels(y, marker = m, alleles = .flipNames(keys$alleleLabels[[m]]))

y
}
Expand Down
27 changes: 18 additions & 9 deletions man/maskPed.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 356ba6e

Please sign in to comment.