Skip to content

Commit

Permalink
Ran flint and styler across whole package
Browse files Browse the repository at this point in the history
  • Loading branch information
robjhyndman committed Dec 15, 2024
1 parent 691a9f4 commit d38a5ab
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 75 deletions.
1 change: 0 additions & 1 deletion R/dist_kde.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,3 @@ kurtosis.dist_kde <- function(x, ..., na.rm = FALSE) {
(mean((x$kde$x - mean(x$kde$x))^4) + 6 * h^2 * v - 3 * h^4) / v^2 - 3
}
}

12 changes: 8 additions & 4 deletions R/gg_density.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ gg_density <- function(
# Set up data frame containing densities
df <- make_density_df(object, ngrid = ngrid)
# Repeat colors
if(length(object) > length(colors)) {
if (length(object) > length(colors)) {
warning("Insufficient colors. Some densities will be plotted in the same color.")
colors <- rep(colors, 1 + round(length(object)/length(colors)))[seq(length(object))]
colors <- rep(colors, 1 + round(length(object) / length(colors)))[seq_along(object)]
}

# HDR thresholds if needed
Expand All @@ -85,7 +85,9 @@ gg_density <- function(
# HDR color palette
hdr_colors <- lapply(
colors,
function(u) { hdr_palette(color = u, prob = c(prob, 0.995)) }
function(u) {
hdr_palette(color = u, prob = c(prob, 0.995))
}
)
names(hdr_colors) <- names_dist(object, unique = TRUE)
} else {
Expand Down Expand Up @@ -231,7 +233,9 @@ gg_density1 <- function(
}

# Color scale and legend
colors <- unlist(lapply(hdr_colors, function(u) { u[1] }))
colors <- unlist(lapply(hdr_colors, function(u) {
u[1]
}))
p <- p + ggplot2::scale_color_manual(breaks = dist_names, values = colors, labels = dist_names)

# Don't show color legend if only one density
Expand Down
34 changes: 18 additions & 16 deletions R/hdr.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ gg_hdrboxplot <- function(data, var1, var2 = NULL, prob = c(0.5, 0.99),

# Data to plot
show_x <- show_data(dist, prob, threshold, anomalies = show_anomalies)
if(NROW(show_x) != NROW(data)) {
if (NROW(show_x) != NROW(data)) {
stop("Something has gone wrong here!")
}

Expand Down Expand Up @@ -177,22 +177,24 @@ hdr_table <- function(object, prob) {
return(df)
})
} else {
output <- lapply(as.list(object),
output <- lapply(
as.list(object),
function(u) {
# If u is a kde, we can use the data
# Otherwise we need to generate a random sample
if (stats::family(u) == "kde") {
x <- lapply(vctrs::vec_data(u), function(u) u$kde$x)[[1]]
} else {
x <- distributional::generate(u, times = 1e5)[[1]]
}
fi <- density(u, at = as.matrix(x))[[1]]
tibble(
distribution = names_dist(object),
prob = prob,
density = quantile(fi, prob = 1 - prob, type = 8)
)
})
# If u is a kde, we can use the data
# Otherwise we need to generate a random sample
if (stats::family(u) == "kde") {
x <- lapply(vctrs::vec_data(u), function(u) u$kde$x)[[1]]
} else {
x <- distributional::generate(u, times = 1e5)[[1]]
}
fi <- density(u, at = as.matrix(x))[[1]]
tibble(
distribution = names_dist(object),
prob = prob,
density = quantile(fi, prob = 1 - prob, type = 8)
)
}
)
}
purrr::list_rbind(output) |>
dplyr::arrange(distribution, prob)
Expand Down
8 changes: 5 additions & 3 deletions R/mvscale.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ mvscale <- function(object, center = stats::median, scale = robustbase::s_Qn,
mat <- object
} else { # It must be a data frame. So let's find the numeric columns
numeric_col <- unlist(lapply(object, is.numeric))
if (any(!numeric_col) & warning) {
if (!all(numeric_col) & warning) {
warning(
"Ignoring non-numeric columns: ",
paste(names(object)[!numeric_col], collapse = ", ")
Expand All @@ -74,14 +74,16 @@ mvscale <- function(object, center = stats::median, scale = robustbase::s_Qn,
mat <- sweep(mat, 2L, med)
}
# Create more resilient version of scale function
if(!is.null(scale)) {
if (!is.null(scale)) {
my_scale <- function(x, ..., na.rm = TRUE) {
s <- scale(x, ..., na.rm = na.rm)
s[s == 0] <- 1 # Avoid division by zero
return(s)
}
} else {
my_scale <- function(x, ..., na.rm = TRUE) {1}
my_scale <- function(x, ..., na.rm = TRUE) {
1
}
}
# Scale
if (d == 1L) {
Expand Down
30 changes: 16 additions & 14 deletions R/show_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,29 @@ show_data <- function(object, prob, threshold, anomalies = FALSE) {
)
# Compute density values
show_x <- mapply(
function(u, dist) {
d <- NCOL(u) - 1
u$den <- unlist(density(dist, at = as.matrix(u[, seq(d)])))
return(u)
},
u = show_x, dist = as.list(object),
SIMPLIFY = FALSE
)
function(u, dist) {
d <- NCOL(u) - 1
u$den <- unlist(density(dist, at = as.matrix(u[, seq(d)])))
return(u)
},
u = show_x, dist = as.list(object),
SIMPLIFY = FALSE
)
# Compute surprisal probabilities
if(anomalies) {
if (anomalies) {
show_x <- mapply(
function(u, dist) {
d <- NCOL(u) - 2
data <- as.matrix(u[, seq(d)])
u$prob <- surprisals_from_den(data, den= log(u$den), distribution = dist, loo = TRUE, probablity = TRUE)
u$prob <- surprisals_from_den(data, den = log(u$den), distribution = dist, loo = TRUE, probablity = TRUE)
return(u)
},
u = show_x, dist = as.list(object),
SIMPLIFY = FALSE
)
}
# Divide into HDR groups
if(!is.null(threshold)) {
if (!is.null(threshold)) {
show_x <- mapply(
function(u, threshold) {
u$group <- cut(u$den, breaks = c(0, threshold$threshold, Inf), labels = FALSE)
Expand All @@ -66,13 +66,15 @@ show_data <- function(object, prob, threshold, anomalies = FALSE) {
)
}
# Mark anomalies
if(anomalies) {
if (anomalies) {
# Anomalies are points with p < 0.005
show_x <- lapply(show_x,
show_x <- lapply(
show_x,
function(u) {
u$anomaly <- u$prob < 0.005 & u$group == "Outside"
return(u)
})
}
)
}
# Combine into a single tibble
purrr::list_rbind(show_x)
Expand Down
31 changes: 17 additions & 14 deletions R/surprisal_prob.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ surprisal_prob <- function(
y = NULL) {
approximation <- match.arg(approximation)
n <- length(s)
if(all(is.na(s))) {
if (all(is.na(s))) {
return(rep(NA_real_, n))
}

if(approximation == "none") {
if(dimension_dist(distribution) > 1) {
if (approximation == "none") {
if (dimension_dist(distribution) > 1) {
warning("Using an empirical approximation for multivariate data")
approximation <- "empirical"
} else if (identical(unique(stats::family(distribution)), "normal")) {
Expand All @@ -24,21 +24,23 @@ surprisal_prob <- function(
approximation <- "symmetric"
}
}
if(approximation == "none") {
if (approximation == "none") {
# Univariate, not normal, not symmetric
if(length(unique(distribution)) == 1L) {
if (length(unique(distribution)) == 1L) {
distribution <- unique(distribution)
} else {
# Need to compute probabilities one by one
dd <- length(distribution)
if(dd != n) {
if (dd != n) {
stop("Length of distribution must be 1 or equal to length of s")
}
p <- numeric(n)
for(i in seq(n)) {
p[i] <- surprisal_prob(s[i], distribution[i], y = y[i],
for (i in seq(n)) {
p[i] <- surprisal_prob(s[i], distribution[i],
y = y[i],
approximation = approximation,
threshold_probability = threshold_probability)
threshold_probability = threshold_probability
)
}
return(p)
}
Expand All @@ -63,7 +65,7 @@ surprisal_prob <- function(
dist_x <- unique(unlist(dist_x))
dist_y <- -unlist(density(distribution, dist_x, log = TRUE))
prob <- (rank(dist_y) - 1) / length(dist_y)
if(all(is.na(dist_y)) | all(is.na(prob))) {
if (all(is.na(dist_y)) | all(is.na(prob))) {
return(rep(NA_real_, n))
}
p <- 1 - approx(dist_y, prob, xout = s, rule = 2, ties = mean)$y
Expand Down Expand Up @@ -104,10 +106,11 @@ surprisal_normal_prob <- function(s, distribution) {
is_symmetric <- function(dist) {
dist <- unique(dist)
fam <- stats::family(dist)
if(length(fam) > 1) {
for(i in seq_along(fam)) {
if(!is_symmetric(dist[i]))
if (length(fam) > 1) {
for (i in seq_along(fam)) {
if (!is_symmetric(dist[i])) {
return(FALSE)
}
}
return(TRUE)
} else if (fam %in% c("student_t", "cauchy", "logistic", "triangular", "uniform")) {
Expand All @@ -118,7 +121,7 @@ is_symmetric <- function(dist) {
q1 <- q1 - q1[1]
q2 <- q2 - q2[1]
out <- sum(abs(q1 + q2) / max(abs(c(q1, q2))))
if(is.na(out)) {
if (is.na(out)) {
return(FALSE)
} else {
return(out < 1e-8)
Expand Down
27 changes: 15 additions & 12 deletions R/surprisals.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ surprisals <- function(
#' y = c(5, rnorm(49)),
#' p_kde = surprisals(y, loo = TRUE),
#' p_normal = surprisals(y, distribution = dist_normal()),
#' p_zscore = 2*(1-pnorm(abs(y)))
#' p_zscore = 2 * (1 - pnorm(abs(y)))
#' )
#' tibble(
#' y = n01$v1,
Expand Down Expand Up @@ -101,17 +101,19 @@ surprisals.default <- function(
if (NCOL(object) == 1L) {
object <- c(object)
}
if(length(distribution) > 1 & length(object) > 1) {
if(length(distribution) != length(object))
if (length(distribution) > 1 & length(object) > 1) {
if (length(distribution) != length(object)) {
stop("Length of distribution and object must be the same or equal to 1")
}
}
if(length(distribution) == NROW(object)) {
if (length(distribution) == NROW(object)) {
den <- mapply(density, distribution, object, log = TRUE)
} else {
den <- density(distribution, at = object, log = TRUE)
if(is.list(den)) {
if(length(den) > 1)
if (is.list(den)) {
if (length(den) > 1) {
stop("What's going on?")
}
den <- den[[1]]
}
}
Expand All @@ -133,9 +135,10 @@ surprisals_from_den <- function(
if (NCOL(object) == 1L) {
object <- c(object)
}
if(length(distribution) > 1 & length(object) > 1) {
if(length(distribution) != length(object))
if (length(distribution) > 1 & length(object) > 1) {
if (length(distribution) != length(object)) {
stop("Length of distribution and object must be the same or equal to 1")
}
}
scores <- -den
if (loo & all(stats::family(distribution) == "kde")) {
Expand All @@ -155,10 +158,10 @@ surprisals_from_den <- function(
}
if (probability) {
surprisal_prob(scores,
distribution = distribution,
approximation = approximation,
threshold_probability = threshold_probability,
y = y
distribution = distribution,
approximation = approximation,
threshold_probability = threshold_probability,
y = y
) |>
suppressWarnings()
} else {
Expand Down
2 changes: 1 addition & 1 deletion man/surprisals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/surprisals.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
test_that("multiplication works", {
set.seed(2)
y <- c(rnorm(10), 100)
expect_equal(max(surprisals(y, dist_normal())), Inf)
expect_equal(max(surprisals(y, h = 1, loo = TRUE)), Inf)
expect_identical(max(surprisals(y, dist_normal())), Inf)
expect_identical(max(surprisals(y, h = 1, loo = TRUE)), Inf)
expect_equal(max(surprisals(y, h = 1, loo = FALSE)),
log(11 / dnorm(0, 0, 1)),
tolerance = 0.01
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_dist_density.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ test_that("dist_density", {
)
# CDF
expect_equal(distributional::cdf(dist, at),
list(ifelse(at < 2, 0, 1), pnorm(at), pexp(at)),
list(as.integer(!(at < 2)), pnorm(at), pexp(at)),
tolerance = 0.001
)
# Quantiles
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test_dist_kde1.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@ test_that("dist_kde1", {
y <- c(rnorm(100), rnorm(100, 5))
dist <- dist_kde(list(x, y))
# Mean
expect_equal(mean(dist), c(mean(x), mean(y)))
expect_identical(mean(dist), c(mean(x), mean(y)))
# Median
expect_equal(median(dist), quantile(dist, 0.5))
# Variance
expect_equal(distributional::variance(dist) > 0, c(TRUE, TRUE))
expect_identical(distributional::variance(dist) > 0, c(TRUE, TRUE))
# Density
at <- seq(-4, 10, by = 1)
expect_equal(lengths(density(dist, at)), c(15L, 15L))
expect_identical(lengths(density(dist, at)), c(15L, 15L))
# CDF
expect_equal(lengths(distributional::cdf(dist, at)), c(15L, 15L))
expect_identical(lengths(distributional::cdf(dist, at)), c(15L, 15L))
# Quantiles
p <- (1:19) / 20
expect_equal(lengths(quantile(dist, p = p)), c(19L, 19L))
expect_identical(lengths(quantile(dist, p = p)), c(19L, 19L))
# Generate
rand_dist <- distributional::generate(dist, times = 1e6)
expect_equal(lapply(rand_dist, mean) |> unlist(),
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_dist_kde2.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ test_that("dist_kde2", {
y <- c(rnorm(100), rnorm(100, 5))
dist <- dist_kde(cbind(x, y))
# Mean
expect_equal(mean(dist), matrix(c(x = mean(x), y = mean(y)), nrow = 1))
expect_identical(mean(dist), matrix(c(x = mean(x), y = mean(y)), nrow = 1))
# Median
expect_error(median(dist))
# Variance
expect_error(distributional::covariance(dist))
# Density
at <- expand.grid(x = seq(-3, 3, by = 0.5), y = seq(-2, 10, by = 2)) |>
as.matrix()
expect_equal(all(density(dist, at)[[1]] >= 0), TRUE)
expect_true(all(density(dist, at)[[1]] >= 0))
# CDF
expect_error(distributional::cdf(dist, at))
# Quantiles
Expand Down

0 comments on commit d38a5ab

Please sign in to comment.