Skip to content

Commit

Permalink
feat: handle both data.frame and numeric vector
Browse files Browse the repository at this point in the history
  • Loading branch information
aravindhebbali committed Mar 2, 2024
1 parent d478871 commit aa4f4cf
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 66 deletions.
135 changes: 73 additions & 62 deletions R/ds-summary-stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
#'
#' @description Range of descriptive statistics for continuous data.
#'
#' @param data A \code{data.frame} or \code{tibble}.
#' @param data An object of type \code{numeric} or \code{data.frame}.
#' @param ... Column(s) in \code{data}.
#'
#' @examples
#' # numeric data
#' ds_summary_stats(mtcarz$mpg)
#'
#' # single variable
#' ds_summary_stats(mtcarz, mpg)
#'
Expand All @@ -25,87 +28,95 @@
#'
ds_summary_stats <- function(data, ...) {

check_df(data)
if (is.numeric(data)) {
print(ds_summary(data))
} else {

var <- rlang::quos(...)
check_df(data)
var <- rlang::quos(...)

if (length(var) < 1) {
is_num <- sapply(data, is.numeric)
if (!any(is_num == TRUE)) {
stop("Data has no continuous variables.", call. = FALSE)
if (length(var) < 1) {
is_num <- sapply(data, is.numeric)
if (!any(is_num == TRUE)) {
stop("Data has no continuous variables.", call. = FALSE)
}
data <- data[is_num]
} else {
data %<>%
dplyr::select(!!! var)
is_num <- sapply(data, is.numeric)
if (!any(is_num == TRUE)) {
stop("Data has no continuous variables.", call. = FALSE)
}
}
data <- data[is_num]
} else {
data %<>%
dplyr::select(!!! var)
is_num <- sapply(data, is.numeric)
if (!any(is_num == TRUE)) {
stop("Data has no continuous variables.", call. = FALSE)
}
}

col_names <- names(data)
for (i in col_names) {
ds_rule(paste0('Variable: ', i))
cat('\n\n')
print(ds_summary(data, i))
cat('\n\n\n')
col_names <- names(data)
for (i in col_names) {
ds_rule(paste0('Variable: ', i))
cat('\n\n')
print(ds_summary(data, i))
cat('\n\n\n')
}
}

}

ds_summary <- function(data, variable) UseMethod("ds_summary")

ds_summary.default <- function(data, variable) {

check_df(data)
vary <- rlang::enquo(variable)
var_name <- deparse(substitute(variable))
check_numeric(data, !! vary, var_name)
if (is.numeric(data)) {
odata <- data
sdata <- na.omit(data)
} else {
check_df(data)
vary <- rlang::enquo(variable)
var_name <- deparse(substitute(variable))
check_numeric(data, !! vary, var_name)

odata <- dplyr::pull(data, !! vary)
odata <- dplyr::pull(data, !! vary)

sdata <-
data %>%
dplyr::pull(!! vary) %>%
na.omit()
sdata <-
data %>%
dplyr::pull(!! vary) %>%
na.omit()
}

low <- ds_tailobs(sdata, 5, "low")
high <- ds_tailobs(sdata, 5, "high")
low_val <- ds_rindex(sdata, low)
high_val <- ds_rindex(sdata, high)

result <-
list(obs = length(odata),
missing = sum(is.na(odata)),
avg = mean(sdata),
tavg = mean(sdata, trim = 0.05),
stdev = sd(sdata),
variance = var(sdata),
skew = ds_skewness(sdata),
kurtosis = ds_kurtosis(sdata),
uss = stat_uss(sdata),
css = ds_css(sdata),
cvar = ds_cvar(sdata),
sem = ds_std_error(sdata),
median = median(sdata),
mode = ds_mode(sdata),
range = ds_range(sdata),
min = min(sdata),
Max = max(sdata),
iqrange = IQR(sdata),
per99 = quantile(sdata, 0.99),
per90 = quantile(sdata, 0.90),
per95 = quantile(sdata, 0.95),
per75 = quantile(sdata, 0.75),
per25 = quantile(sdata, 0.25),
per10 = quantile(sdata, 0.10),
per5 = quantile(sdata, 0.05),
per1 = quantile(sdata, 0.01),
lowobs = low,
highobs = high,
lowobsi = low_val,
highobsi = high_val)
list(obs = length(odata),
missing = sum(is.na(odata)),
avg = mean(sdata),
tavg = mean(sdata, trim = 0.05),
stdev = sd(sdata),
variance = var(sdata),
skew = ds_skewness(sdata),
kurtosis = ds_kurtosis(sdata),
uss = stat_uss(sdata),
css = ds_css(sdata),
cvar = ds_cvar(sdata),
sem = ds_std_error(sdata),
median = median(sdata),
mode = ds_mode(sdata),
range = ds_range(sdata),
min = min(sdata),
Max = max(sdata),
iqrange = IQR(sdata),
per99 = quantile(sdata, 0.99),
per90 = quantile(sdata, 0.90),
per95 = quantile(sdata, 0.95),
per75 = quantile(sdata, 0.75),
per25 = quantile(sdata, 0.25),
per10 = quantile(sdata, 0.10),
per5 = quantile(sdata, 0.05),
per1 = quantile(sdata, 0.01),
lowobs = low,
highobs = high,
lowobsi = low_val,
highobsi = high_val)

class(result) <- "ds_summary"
return(result)
Expand Down
5 changes: 4 additions & 1 deletion man/ds_summary_stats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 11 additions & 3 deletions tests/testthat/test-summary-stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@ context("test-summary-stats")

ndata <- dplyr::select(mtcarz, mpg)

test_that("output from ds_summary is as expected", {
test_that("output from ds_summary is as expected when data is a data.frame", {

actual <- round(ds_summary(mtcarz, mpg)$variance, 2)
expected <- 36.32
expect_equal(actual, expected)
expected <- 36.32
expect_equal(actual, expected)

})

test_that("output from ds_summary is as expected when data is numeric", {

actual <- round(ds_summary(mtcarz$mpg)$variance, 2)
expected <- 36.32
expect_equal(actual, expected)

})

Expand Down

0 comments on commit aa4f4cf

Please sign in to comment.