Skip to content

Commit

Permalink
handle input df to get_dupes() that already has a variable called n
Browse files Browse the repository at this point in the history
fixes #162
  • Loading branch information
sfirke committed Jan 9, 2018
1 parent e3626f1 commit 28b402b
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 14 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The legacy functions `crosstab` and `adorn_crosstab` have been deprecated, but r

## Bug fixes
* `adorn_totals("row")` handles quirky variable names in 1st column [(#118)](https://github.com/sfirke/janitor/issues/118)
* `get_dupes()` returns the correct result when a variable in the input data.frame is already called `"n"` [(#162)](https://github.com/sfirke/janitor/issues/162)


# janitor 0.3.1 (Release date: 2018-01-04)
Expand Down
27 changes: 13 additions & 14 deletions R/get_dupes.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,38 +14,37 @@
#'

get_dupes <- function(dat, ...) {
names <- as.list(substitute(list(...)))[-1L]
uq_names <- as.list(substitute(list(...)))[-1L] # unquoted names for NSE calls, need quoted names separately for messages + warnings
df_name <- deparse(substitute(dat))

# check that each variable name provided is present in names(dat); if not, throw error
var_names <- names
if(is.list(var_names)){ var_names <- lapply(names, deparse) } # 'names' is not a list if defaulting to whole df, need this for consistency
check_vars_in_df(dat, df_name, unlist(var_names))
q_names <- uq_names
if(is.list(q_names)){ q_names <- lapply(uq_names, deparse) } # 'names' is not a list if defaulting to whole df, need this for consistency
check_vars_in_df(dat, df_name, unlist(q_names))
dupe_count <- NULL # to appease NOTE for CRAN; does nothing.

if(length(names)==0){ # if called on an entire data.frame with no specified variable names
var_names <- names(dat)
names <- paste0("`", as.list(names(dat)), "`") # to handle illegal variable names
if(length(uq_names)==0){ # if called on an entire data.frame with no specified variable names
q_names <- names(dat)
uq_names <- paste0("`", as.list(names(dat)), "`") # to handle illegal variable names
message("No variable names specified - using all columns.\n")
}

# calculate counts to join back to main df
counts <- dat %>%
dplyr::count_(vars = names)

dplyr::count_(vars = uq_names)

names(counts)[ncol(counts)] <- "dupe_count"
# join new count vector to main data.frame
dupes <- suppressMessages(dplyr::inner_join(counts, dat))

dupes <- dupes %>%
dplyr::filter(n > 1) %>%
dplyr::filter(dupe_count > 1) %>%
dplyr::ungroup() %>%
dplyr::arrange_(.dots = names) %>%
dplyr::rename(dupe_count = n)
dplyr::arrange_(.dots = uq_names)

# shorten error message for large data.frames
if(length(var_names) > 10){ var_names <- c(var_names[1:9], paste("... and", length(var_names) - 9, "other variables")) }
if(nrow(dupes) == 0){message(paste0("No duplicate combinations found of: ", paste(var_names, collapse = ", ")))}
if(length(q_names) > 10){ q_names <- c(q_names[1:9], paste("... and", length(q_names) - 9, "other variables")) }
if(nrow(dupes) == 0){message(paste0("No duplicate combinations found of: ", paste(q_names, collapse = ", ")))}
dupes
}

Expand Down

0 comments on commit 28b402b

Please sign in to comment.