Skip to content

Commit

Permalink
Merge pull request #109 from Crunch-io/issue_069
Browse files Browse the repository at this point in the history
WIP: New summarization methods for crunchtabs toplines
  • Loading branch information
1beb authored May 8, 2020
2 parents 6723f1e + 5d4bdcf commit 4de3a14
Show file tree
Hide file tree
Showing 30 changed files with 1,071 additions and 70 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: In order to generate custom survey reports, this package provides
'banners' (cross-tabulations) of datasets in the Crunch
(<https://crunch.io/>) web service. Reports can be written in 'PDF' format
using 'LaTeX' or in Microsoft Excel '.xlsx' files.
Version: 1.2.3
Version: 1.2.5
Authors@R: c(
person("Persephone", "Tsebelis", role="aut"),
person("Kamil", "Sedrowicz", role="aut"),
Expand Down
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,42 @@ S3method(calculateIfLongtable,CrossTabVar)
S3method(calculateIfLongtable,ToplineCategoricalArray)
S3method(calculateIfLongtable,ToplineVar)
S3method(calculateIfLongtable,default)
S3method(codebookItem,CategoricalArrayVariable)
S3method(codebookItem,CategoricalVariable)
S3method(codebookItem,DatetimeVariable)
S3method(codebookItem,MultipleResponseVariable)
S3method(codebookItem,NumericVariable)
S3method(codebookItem,TextVariable)
S3method(getName,BannerVar)
S3method(getName,CrossTabVar)
S3method(getName,CrunchCube)
S3method(getName,CrunchTabs)
S3method(getName,ToplineBase)
S3method(getName,default)
S3method(prepareExtraSummary,DatetimeVariable)
S3method(prepareExtraSummary,NumericVariable)
S3method(prepareExtraSummary,TextVariable)
S3method(prepareExtraSummary,default)
S3method(print,Crosstabs)
S3method(print,Toplines)
S3method(tableHeader,CrossTabVar)
S3method(tableHeader,ToplineCategoricalArray)
S3method(tableHeader,ToplineVar)
S3method(tableHeader,default)
export(banner)
export(codebookItem)
export(codebookItemTxt)
export(crosstabs)
export(getName)
export(prepareExtraSummary)
export(surveyDuration)
export(themeDefaultExcel)
export(themeDefaultLatex)
export(themeHuffPoCrosstabs)
export(themeHuffPoToplines)
export(themeNew)
export(themeUKPolitical)
export(writeCodebook)
export(writeExcel)
export(writeLatex)
import(crunch)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
## crunchtabs 1.2.5

* Add summarization for continuous variables (NumericVariable) in toplines (#80)
* Add summarization for date time variable (DatetimeVariable) in toplines (#120)
* Add summarization for text data (TextVariable) in toplines (#121)
* Add amsmath package to latex preamble (#123)
* Bugfix for open=TRUE, PDF doc now opens appropriately (#125)
* Update FAQ: question alias numbering example (#128)
* Update FAQ: customizing stub widths (#118)
* Update FAQ: specifying logos in excel (#115)
* Internal proposal for new summarizations and codebook (#117)
* Fix codecov checks (#111)

## crunchtabs 1.2.3

* New introductory and FAQ vignettes (#96)
* tableHeader.CrossTabVar respects global stub width setting (#93)
* Updated vignette (#65)
* Adding more tests (#84)
Expand Down
158 changes: 158 additions & 0 deletions R/codebook.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#' Generate a codebook item
#'
#' A passthrough function that creates a table header appropriate to the class
#' of the data object being passed. Expected classes are:
#'
#' * CategoricalVariable
#' * CategoricalArrayVariable
#' * MultipleResponseVariable
#' * NumericVariable
#' * TextVariable
#' * DatetimeVariable
#'
#' Importantly, this also controls the relative widths of the columns.
#'
#' @md
#' @param x An object of one of the types listed
#' @export
codebookItem <- function(x, ...) {
UseMethod("codebookItem", x)
}

#' @rdname codebookItem
codebookItem.default <- function(x) {
wrong_class_error(x, c("CategoricalVariable", "CategoricalArrayVariable", "MultipleResponseVariable", "TextVariable", "NumericVariable", "DatetimeVariable"), "codebookItem")
}


#' Extract basic question information
#'
#' Extracts the following:
#'
#' * body
#' * alias
#' * description or question text
#' * notes or filter text
#' * id
#'
#' @md
#' @export
codebookItemTxt <- function(x) {
txt <- x@tuple@body
l <- list()
l$title <- txt$alias
l$alias <- txt$alias
l$name <- txt$name
l$id <- txt$id
l$filter_text <- txt$notes
l$question <- txt$description
l
}

#'
codebookItemSubVars <- function (x) {
sv <- subvariables(x)
als <- unname(unlist(lapply(sv@index, getElement, "alias")))
resp <- unname(unlist(lapply(sv@index, getElement, "name")))
sv <- data.frame(`Sub Alias` = als, Name = resp)
sv_responses <- categories(x)

list(
key = sv,
key2 = setNames(sv_responses, c("Response", "Value"))
)

}


#' @describeIn codebookItem Prepares a codebookItem for a CategoricalVariable
#' @export
codebookItem.CategoricalVariable <- function(x) {
txt <- codebookItemTxt(x)
cats <- categories(x)
responses <- do.call(rbind, cats@.Data)
responses <- as.data.frame(responses[!unlist(responses[, "missing"]), ])
responses <- lapply(responses, unlist)
if (all(is.null(responses$numeric_value)))
responses$numeric_value = responses$id
responses <- as.data.frame(lapply(responses, unlist)[c("name", "numeric_value")])
names(responses) <- c("Response", "Value")

latexTop <- sprintf("
\\thispagestyle{fancy}
\\lhead{%s}
\\setlength{\\extrarowheight}{20pt}
\\begin{tabular*}{7in}{p{2.5in}p{4.5in}}
Question & %s \\\\
Name & %s \\\\
Alias & %s \\\\
ID & %s \\\\
Filtering Notes & %s \\\\
\\end{tabular*}
",
txt$meta$name,
txt$meta$question,
txt$meta$name,
txt$meta$alias,
txt$meta$id,
ifelse(txt$meta$filter_text == "", "None", txt$meta$filter_text)
)
latexTop <- gsub(" ", "", latexTop)

latexResponse

latexTop
}

#' @describeIn codebookItem Prepares a codebookitem for a CategoricalArrayVariable
#' @export
codebookItem.CategoricalArrayVariable <- function(x) {
txt = codebookItemTxt(x)
subvars = codebookItemSubVars(x)


}

#' @describeIn codebookItem Prepares a codebookitem for a MultipleResponseVariable
#' @export
codebookItem.MultipleResponseVariable <- function(x) {

}

#' @describeIn codebookItem Prepares a codebookitem for a NumericVariable
#' @export
codebookItem.NumericVariable <- function(x) {
tmp <- c(summary(c_var), SD = sd(c_var, na.rm = TRUE))
tmp <- tibble::rownames_to_column(as.data.frame(tmp))
colnames(tmp) <- c("Summary", "Value")
tmp$Value <- round(tmp$Value, 2)
}

#' @describeIn codebookItem Prepares a codebookitem for a TextVariable
#' @export
codebookItem.TextVariable <- function(x) {

}

#' @describeIn codebookItem Prepares a codebookitem for a DatetimeVariable
#' @export
codebookItem.DatetimeVariable <- function(x) {

}

#' Generate LaTeX CodeBooks
#'
#' \code{writeCodebook} produces publication-quality LaTeX reports
#'
#' @param ds A crunch dataset
#' @param ds
#'
#' @param ... Additional arguments passed to writeLatx
#'
#' @importFrom utils installed.packages
#' @export
writeCodebook <- function(...) {

writeLatex(...)

}
104 changes: 97 additions & 7 deletions R/crosstabs.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#' @param banner An optional object of class \code{Banner} that should be used to generate
#' a Crosstabs summary. Defaults to \code{NULL} - a Toplines summary is produced and returned.
#' @param codebook If \code{TRUE}, codebook data summaries are prepared. Defaults to \code{FALSE}.
#' @param include_numeric Logical. Should we include numeric questions? Defaults to FALSE. Implemented for Toplines only.
#' @param include_datetime Logical. Should we include date time questions? Defaults to FALSE. Implemented for Toplines only.
#' @param include_verbatims Logical. Should we include a sample text varaibles? Defaults to FALSE. Implemented for Toplines only.
#' @param num_verbatims An integer identifying the number of examples to extract from a text variable. Defaults to 10. Implemented for Toplines only.
#' @return A Toplines (when no banner is provided) or Crosstabs (when a banner is provided)
#' summary of the input dataset.
#' @examples
Expand All @@ -22,7 +26,7 @@
#' @importFrom crunch name aliases allVariables is.Numeric is.dataset weight alias weightVariables is.variable
#' @importFrom methods is
#' @export
crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE) {
crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE, include_numeric = FALSE, include_datetime = FALSE, include_verbatims = FALSE, num_verbatims = 10) {

# TODO: open ends
wrong_class_error(dataset, "CrunchDataset", "dataset")
Expand Down Expand Up @@ -61,10 +65,11 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
vars_out <- if (codebook) { vars } else {
intersect(vars, all_aliases[all_types %in% c("categorical", "multiple_response", "categorical_array", "numeric")]) }

error_if_items(
unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
"`vars` of type(s) {items} are not supported and have been skipped.",
and = TRUE, error = FALSE)

# error_if_items(
# unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
# "`vars` of type(s) {items} are not supported and have been skipped.",
# and = TRUE, error = FALSE)

if (length(vars_out) == 0) {
stop("No variables provided.")
Expand All @@ -79,7 +84,13 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
banner
}

results <- tabBooks(dataset = dataset, vars = vars_out, banner = banner_use, weight = weight_var, topline = is.null(banner))
results <- tabBooks(
dataset = dataset,
vars = vars_out,
banner = banner_use,
weight = weight_var,
topline = is.null(banner)
)

if (codebook) {
res_class <- "Codebook"
Expand All @@ -106,12 +117,91 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
class(banner) <- 'Banner'
}

# Here we create logic for including summaries
# for variable types that did not previously have
# summaries (Numeric, Datetime, Text)

var_types <- unlist(lapply(dataset[vars], class))
names(var_types) <- vars
numerics <- vars[var_types == "NumericVariable"]
datetimes <- vars[var_types == "DatetimeVariable"]
verbatims <- vars[var_types == "TextVariable"]

if (include_numeric & length(numerics) > 0) {
# drop weighting vars
weightVars <- unlist(
lapply(numerics, function(x) is.weightVariable(dataset[[x]]))
)

numerics <- numerics[!weightVars]

numRes <- lapply(numerics, function(x) {
prepareExtraSummary(
dataset[[x]],
weighted = !is.null(weight)
)
})
names(numRes) <- numerics

results = c(
results, numRes
)
}

if (include_datetime & length(datetimes) > 0) {

datetimeRes <- lapply(datetimes, function(x) {
prepareExtraSummary(
dataset[[x]],
weighted = !is.null(weight)
)
})

names(datetimeRes) <- datetimes
results = c(
results,
datetimeRes
)
}

if (include_verbatims & length(verbatims) > 0) {
verbatimRes <- lapply(verbatims, function(x) {
prepareExtraSummary(
dataset[[x]],
weighted = !is.null(weight)
)
})

names(verbatimRes) <- verbatims

results = c(
results,
verbatimRes
)
}

if (include_verbatims | include_datetime | include_numeric) {
# If we include new question types we must reflow question
# numbers because otherwise they will be missing from the
# faked objects

# First re-flow in dataset order
tmpResults <- list()
for (i in vars) { tmpResults[[i]] <- results[[i]] }
# Then re-flow question numbers
results = reflowQuestionNumbers(tmpResults)
}

summary_data <- list(
metadata = c(
list(
title = name(dataset), weight = weight,
start_date = crunch::startDate(dataset), end_date = crunch::endDate(dataset),
description = crunch::description(dataset))), results = results, banner = banner
description = crunch::description(dataset)
)
),
results = results,
banner = banner
)

class(summary_data) <- res_class
Expand Down
Loading

0 comments on commit 4de3a14

Please sign in to comment.