diff --git a/.gitignore b/.gitignore index 9789cbce..6a64548f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ docs local_data *.o *.so +*.dll diff --git a/NAMESPACE b/NAMESPACE index 2e1e8a00..19e75602 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ export(estimateBaselineConvexHull) export(estimateBaselineMedian) export(estimateBaselineSnip) export(estimateBaselineTopHat) +export(force_sorted) export(formatRt) export(getImputeMargin) export(gnps) diff --git a/NEWS.md b/NEWS.md index 4614bb2e..029aba87 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # MsCoreUtils 1.15 +## MsCoreUtils 1.15.5 + +- Add function `force_sorted()` to adjust a numeric vector to ensure +increasing/sorted values. + ## MsCoreUtils 1.15.4 - Fix partial argument match (see issue #125). diff --git a/R/force_sorted.R b/R/force_sorted.R new file mode 100644 index 00000000..a3846dd6 --- /dev/null +++ b/R/force_sorted.R @@ -0,0 +1,72 @@ +#' @title Forcing a numeric vector into a monotonously increasing sequence. +#' +#' @description +#' This function performs interpolation on the non-increasing parts of a +#' numeric input vector to ensure its values are monotonously increasing. +#' If the values are non-increasing at the end of the vector, these values will +#' be replaced by a sequence of numeric values, starting from the last +#' increasing value in the input vector, and increasing by a very small value, +#' which can be defined with parameter `by ` +#' +#' @param x `numeric` vector. +#' +#' @param by `numeric(1)` value that will determine the monotonous increase in +#' case the values at the end of the vector are non-increasing and +#' therefore interpolation would not be possible. Defaults +#' to `by = .Machine$double.eps` which is the smallest positive +#' floating-point number x such that 1 + x != 1. +#' +#' @return A vector with continuously increasing values. +#' +#' @note +#' NA values will not be replaced and be returned as-is. +#' +#' @examples +#' x <- c(NA, NA, NA, 1.2, 1.1, 1.14, 1.2, 1.3, NA, 1.04, 1.4, 1.6, NA, NA) +#' y <- force_sorted(x) +#' is.unsorted(y, na.rm = TRUE) +#' +#' ## Vector non increasing at the end +#' x <- c(1, 2, 1.5, 2) +#' y <- force_sorted(x, by = 0.1) +#' is.unsorted(y, na.rm = TRUE) +#' +#' ## We can see the values were not interpolated but rather replaced by the +#' ## last increasing value `2` and increasing by 0.1. +#' y +#' +#' @export +#' +#' @rdname force_sorted +force_sorted <- function(x, by = .Machine$double.eps) { + # Select only the non-NA values + if (!is.numeric(x) && !is.integer(x)) + stop("'x' needs to be numeric or integer") + nna_idx <- which(!is.na(x)) + vec_temp <- x[nna_idx] + + while (any(diff(vec_temp) < 0)) { + idx <- which.max(diff(vec_temp) < 0) + # Find next biggest value + next_idx <- which(vec_temp > vec_temp[idx])[1L] + + if (is.na(next_idx)) { + l <- idx:length(vec_temp) + vec_temp[l] <- seq(vec_temp[idx], by = by, + length.out = length(l)) + warning("Found decreasing values at the end of the vector. ", + "Interpolation is not possible in this region. Instead, ", + "replacing these values with a sequence that starts from ", + "the last increasing value and increments by ", by, + ". See help for more details") + break + } + # Interpolation + idx_range <- idx:next_idx + vec_temp[idx_range] <- seq(vec_temp[idx], vec_temp[next_idx], + length.out = length(idx_range)) + } + x[nna_idx] <- vec_temp + x +} + diff --git a/man/force_sorted.Rd b/man/force_sorted.Rd new file mode 100644 index 00000000..05c2389d --- /dev/null +++ b/man/force_sorted.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/force_sorted.R +\name{force_sorted} +\alias{force_sorted} +\title{Forcing a numeric vector into a monotonously increasing sequence.} +\usage{ +force_sorted(x, by = .Machine$double.eps) +} +\arguments{ +\item{x}{\code{numeric} vector.} + +\item{by}{\code{numeric(1)} value that will determine the monotonous increase in +case the values at the end of the vector are non-increasing and +therefore interpolation would not be possible. Defaults +to \code{by = .Machine$double.eps} which is the smallest positive +floating-point number x such that 1 + x != 1.} +} +\value{ +A vector with continuously increasing values. +} +\description{ +This function performs interpolation on the non-increasing parts of a +numeric input vector to ensure its values are monotonously increasing. +If the values are non-increasing at the end of the vector, these values will +be replaced by a sequence of numeric values, starting from the last +increasing value in the input vector, and increasing by a very small value, +which can be defined with parameter \code{by } +} +\note{ +NA values will not be replaced and be returned as-is. +} +\examples{ +x <- c(NA, NA, NA, 1.2, 1.1, 1.14, 1.2, 1.3, NA, 1.04, 1.4, 1.6, NA, NA) +y <- force_sorted(x) +is.unsorted(y, na.rm = TRUE) + +## Vector non increasing at the end +x <- c(1, 2, 1.5, 2) +y <- force_sorted(x, by = 0.1) +is.unsorted(y, na.rm = TRUE) + +## We can see the values were not interpolated but rather replaced by the +## last increasing value `2` and increasing by 0.1. +y + +} diff --git a/man/gnps.Rd b/man/gnps.Rd index cabb3fb8..d28abefe 100644 --- a/man/gnps.Rd +++ b/man/gnps.Rd @@ -118,7 +118,7 @@ Other grouping/matching functions: \code{\link{closest}()} Other distance/similarity functions: -\code{\link{distance}} +\code{\link{distance}()} } \author{ Johannes Rainer, Michael Witting, based on the code from diff --git a/tests/testthat/test_force_sorted.R b/tests/testthat/test_force_sorted.R new file mode 100644 index 00000000..676fea18 --- /dev/null +++ b/tests/testthat/test_force_sorted.R @@ -0,0 +1,20 @@ +test_that("forceSorting works", { + vec <- c(NA, NA, NA, 1.2, 1.1, 1.14, 1.2, 1.3, 1.1, 1.04, 1.4, 1.6, NA, NA) + # Expected result after interpolation + sorted <- c(NA, NA, NA, 1.2, 1.225, 1.25, 1.275, 1.3, 1.333, 1.367, + 1.4, 1.6, NA, NA) + result <- force_sorted(vec) + expect_equal(result, sorted, tolerance = 0.001) + + # Test with decreasing values at the end + vec <- c(NA, NA, NA, 1.2, 1.1, 1.14, 1.2, 1.3, 1.4, 1.04, 1.2, 1.04, NA) + expect_warning(result <- force_sorted(vec, by = 0.000001), "replacing") + sorted <- c(NA, NA, NA, 1.2, 1.225, 1.25, 1.275, 1.3, 1.4, 1.400001, + 1.400002, 1.400003, NA) + expect_equal(result, sorted) + + # Test with sorted values + vec <- c(NA, NA, NA, 1.2, 1.3, 1.42, 1.46, 1.49, 1.498, 1.5, 1.6, 1.66, NA) + result <- force_sorted(vec) + expect_equal(vec, result) +})