diff --git a/DESCRIPTION b/DESCRIPTION index 2672674c..030b044a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,7 @@ Description: In order to generate custom survey reports, this package provides 'banners' (cross-tabulations) of datasets in the Crunch () web service. Reports can be written in 'PDF' format using 'LaTeX' or in Microsoft Excel '.xlsx' files. -Version: 1.2.3 +Version: 1.2.5 Authors@R: c( person("Persephone", "Tsebelis", role="aut"), person("Kamil", "Sedrowicz", role="aut"), diff --git a/NAMESPACE b/NAMESPACE index 161f622f..c7645eaf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,12 +4,22 @@ S3method(calculateIfLongtable,CrossTabVar) S3method(calculateIfLongtable,ToplineCategoricalArray) S3method(calculateIfLongtable,ToplineVar) S3method(calculateIfLongtable,default) +S3method(codebookItem,CategoricalArrayVariable) +S3method(codebookItem,CategoricalVariable) +S3method(codebookItem,DatetimeVariable) +S3method(codebookItem,MultipleResponseVariable) +S3method(codebookItem,NumericVariable) +S3method(codebookItem,TextVariable) S3method(getName,BannerVar) S3method(getName,CrossTabVar) S3method(getName,CrunchCube) S3method(getName,CrunchTabs) S3method(getName,ToplineBase) S3method(getName,default) +S3method(prepareExtraSummary,DatetimeVariable) +S3method(prepareExtraSummary,NumericVariable) +S3method(prepareExtraSummary,TextVariable) +S3method(prepareExtraSummary,default) S3method(print,Crosstabs) S3method(print,Toplines) S3method(tableHeader,CrossTabVar) @@ -17,14 +27,19 @@ S3method(tableHeader,ToplineCategoricalArray) S3method(tableHeader,ToplineVar) S3method(tableHeader,default) export(banner) +export(codebookItem) +export(codebookItemTxt) export(crosstabs) export(getName) +export(prepareExtraSummary) +export(surveyDuration) export(themeDefaultExcel) export(themeDefaultLatex) export(themeHuffPoCrosstabs) export(themeHuffPoToplines) export(themeNew) export(themeUKPolitical) +export(writeCodebook) export(writeExcel) export(writeLatex) import(crunch) diff --git a/NEWS.md b/NEWS.md index ad0e04fd..ada4b3fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,19 @@ +## crunchtabs 1.2.5 + +* Add summarization for continuous variables (NumericVariable) in toplines (#80) +* Add summarization for date time variable (DatetimeVariable) in toplines (#120) +* Add summarization for text data (TextVariable) in toplines (#121) +* Add amsmath package to latex preamble (#123) +* Bugfix for open=TRUE, PDF doc now opens appropriately (#125) +* Update FAQ: question alias numbering example (#128) +* Update FAQ: customizing stub widths (#118) +* Update FAQ: specifying logos in excel (#115) +* Internal proposal for new summarizations and codebook (#117) +* Fix codecov checks (#111) + ## crunchtabs 1.2.3 +* New introductory and FAQ vignettes (#96) * tableHeader.CrossTabVar respects global stub width setting (#93) * Updated vignette (#65) * Adding more tests (#84) diff --git a/R/codebook.R b/R/codebook.R new file mode 100644 index 00000000..bef7a660 --- /dev/null +++ b/R/codebook.R @@ -0,0 +1,158 @@ +#' Generate a codebook item +#' +#' A passthrough function that creates a table header appropriate to the class +#' of the data object being passed. Expected classes are: +#' +#' * CategoricalVariable +#' * CategoricalArrayVariable +#' * MultipleResponseVariable +#' * NumericVariable +#' * TextVariable +#' * DatetimeVariable +#' +#' Importantly, this also controls the relative widths of the columns. +#' +#' @md +#' @param x An object of one of the types listed +#' @export +codebookItem <- function(x, ...) { + UseMethod("codebookItem", x) +} + +#' @rdname codebookItem +codebookItem.default <- function(x) { + wrong_class_error(x, c("CategoricalVariable", "CategoricalArrayVariable", "MultipleResponseVariable", "TextVariable", "NumericVariable", "DatetimeVariable"), "codebookItem") +} + + +#' Extract basic question information +#' +#' Extracts the following: +#' +#' * body +#' * alias +#' * description or question text +#' * notes or filter text +#' * id +#' +#' @md +#' @export +codebookItemTxt <- function(x) { + txt <- x@tuple@body + l <- list() + l$title <- txt$alias + l$alias <- txt$alias + l$name <- txt$name + l$id <- txt$id + l$filter_text <- txt$notes + l$question <- txt$description + l +} + +#' +codebookItemSubVars <- function (x) { + sv <- subvariables(x) + als <- unname(unlist(lapply(sv@index, getElement, "alias"))) + resp <- unname(unlist(lapply(sv@index, getElement, "name"))) + sv <- data.frame(`Sub Alias` = als, Name = resp) + sv_responses <- categories(x) + + list( + key = sv, + key2 = setNames(sv_responses, c("Response", "Value")) + ) + +} + + +#' @describeIn codebookItem Prepares a codebookItem for a CategoricalVariable +#' @export +codebookItem.CategoricalVariable <- function(x) { + txt <- codebookItemTxt(x) + cats <- categories(x) + responses <- do.call(rbind, cats@.Data) + responses <- as.data.frame(responses[!unlist(responses[, "missing"]), ]) + responses <- lapply(responses, unlist) + if (all(is.null(responses$numeric_value))) + responses$numeric_value = responses$id + responses <- as.data.frame(lapply(responses, unlist)[c("name", "numeric_value")]) + names(responses) <- c("Response", "Value") + + latexTop <- sprintf(" + \\thispagestyle{fancy} + \\lhead{%s} + \\setlength{\\extrarowheight}{20pt} + \\begin{tabular*}{7in}{p{2.5in}p{4.5in}} + Question & %s \\\\ + Name & %s \\\\ + Alias & %s \\\\ + ID & %s \\\\ + Filtering Notes & %s \\\\ + \\end{tabular*} + ", + txt$meta$name, + txt$meta$question, + txt$meta$name, + txt$meta$alias, + txt$meta$id, + ifelse(txt$meta$filter_text == "", "None", txt$meta$filter_text) + ) + latexTop <- gsub(" ", "", latexTop) + + latexResponse + + latexTop +} + +#' @describeIn codebookItem Prepares a codebookitem for a CategoricalArrayVariable +#' @export +codebookItem.CategoricalArrayVariable <- function(x) { + txt = codebookItemTxt(x) + subvars = codebookItemSubVars(x) + + +} + +#' @describeIn codebookItem Prepares a codebookitem for a MultipleResponseVariable +#' @export +codebookItem.MultipleResponseVariable <- function(x) { + +} + +#' @describeIn codebookItem Prepares a codebookitem for a NumericVariable +#' @export +codebookItem.NumericVariable <- function(x) { + tmp <- c(summary(c_var), SD = sd(c_var, na.rm = TRUE)) + tmp <- tibble::rownames_to_column(as.data.frame(tmp)) + colnames(tmp) <- c("Summary", "Value") + tmp$Value <- round(tmp$Value, 2) +} + +#' @describeIn codebookItem Prepares a codebookitem for a TextVariable +#' @export +codebookItem.TextVariable <- function(x) { + +} + +#' @describeIn codebookItem Prepares a codebookitem for a DatetimeVariable +#' @export +codebookItem.DatetimeVariable <- function(x) { + +} + +#' Generate LaTeX CodeBooks +#' +#' \code{writeCodebook} produces publication-quality LaTeX reports +#' +#' @param ds A crunch dataset +#' @param ds +#' +#' @param ... Additional arguments passed to writeLatx +#' +#' @importFrom utils installed.packages +#' @export +writeCodebook <- function(...) { + + writeLatex(...) + +} diff --git a/R/crosstabs.R b/R/crosstabs.R index a18d3971..92466d79 100644 --- a/R/crosstabs.R +++ b/R/crosstabs.R @@ -11,6 +11,10 @@ #' @param banner An optional object of class \code{Banner} that should be used to generate #' a Crosstabs summary. Defaults to \code{NULL} - a Toplines summary is produced and returned. #' @param codebook If \code{TRUE}, codebook data summaries are prepared. Defaults to \code{FALSE}. +#' @param include_numeric Logical. Should we include numeric questions? Defaults to FALSE. Implemented for Toplines only. +#' @param include_datetime Logical. Should we include date time questions? Defaults to FALSE. Implemented for Toplines only. +#' @param include_verbatims Logical. Should we include a sample text varaibles? Defaults to FALSE. Implemented for Toplines only. +#' @param num_verbatims An integer identifying the number of examples to extract from a text variable. Defaults to 10. Implemented for Toplines only. #' @return A Toplines (when no banner is provided) or Crosstabs (when a banner is provided) #' summary of the input dataset. #' @examples @@ -22,7 +26,7 @@ #' @importFrom crunch name aliases allVariables is.Numeric is.dataset weight alias weightVariables is.variable #' @importFrom methods is #' @export -crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE) { +crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE, include_numeric = FALSE, include_datetime = FALSE, include_verbatims = FALSE, num_verbatims = 10) { # TODO: open ends wrong_class_error(dataset, "CrunchDataset", "dataset") @@ -61,10 +65,11 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da vars_out <- if (codebook) { vars } else { intersect(vars, all_aliases[all_types %in% c("categorical", "multiple_response", "categorical_array", "numeric")]) } - error_if_items( - unique(types(allVariables(dataset[setdiff(vars, vars_out)]))), - "`vars` of type(s) {items} are not supported and have been skipped.", - and = TRUE, error = FALSE) + + # error_if_items( + # unique(types(allVariables(dataset[setdiff(vars, vars_out)]))), + # "`vars` of type(s) {items} are not supported and have been skipped.", + # and = TRUE, error = FALSE) if (length(vars_out) == 0) { stop("No variables provided.") @@ -79,7 +84,13 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da banner } - results <- tabBooks(dataset = dataset, vars = vars_out, banner = banner_use, weight = weight_var, topline = is.null(banner)) + results <- tabBooks( + dataset = dataset, + vars = vars_out, + banner = banner_use, + weight = weight_var, + topline = is.null(banner) + ) if (codebook) { res_class <- "Codebook" @@ -106,12 +117,91 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da class(banner) <- 'Banner' } + # Here we create logic for including summaries + # for variable types that did not previously have + # summaries (Numeric, Datetime, Text) + + var_types <- unlist(lapply(dataset[vars], class)) + names(var_types) <- vars + numerics <- vars[var_types == "NumericVariable"] + datetimes <- vars[var_types == "DatetimeVariable"] + verbatims <- vars[var_types == "TextVariable"] + + if (include_numeric & length(numerics) > 0) { + # drop weighting vars + weightVars <- unlist( + lapply(numerics, function(x) is.weightVariable(dataset[[x]])) + ) + + numerics <- numerics[!weightVars] + + numRes <- lapply(numerics, function(x) { + prepareExtraSummary( + dataset[[x]], + weighted = !is.null(weight) + ) + }) + names(numRes) <- numerics + + results = c( + results, numRes + ) + } + + if (include_datetime & length(datetimes) > 0) { + + datetimeRes <- lapply(datetimes, function(x) { + prepareExtraSummary( + dataset[[x]], + weighted = !is.null(weight) + ) + }) + + names(datetimeRes) <- datetimes + results = c( + results, + datetimeRes + ) + } + + if (include_verbatims & length(verbatims) > 0) { + verbatimRes <- lapply(verbatims, function(x) { + prepareExtraSummary( + dataset[[x]], + weighted = !is.null(weight) + ) + }) + + names(verbatimRes) <- verbatims + + results = c( + results, + verbatimRes + ) + } + + if (include_verbatims | include_datetime | include_numeric) { + # If we include new question types we must reflow question + # numbers because otherwise they will be missing from the + # faked objects + + # First re-flow in dataset order + tmpResults <- list() + for (i in vars) { tmpResults[[i]] <- results[[i]] } + # Then re-flow question numbers + results = reflowQuestionNumbers(tmpResults) + } + summary_data <- list( metadata = c( list( title = name(dataset), weight = weight, start_date = crunch::startDate(dataset), end_date = crunch::endDate(dataset), - description = crunch::description(dataset))), results = results, banner = banner + description = crunch::description(dataset) + ) + ), + results = results, + banner = banner ) class(summary_data) <- res_class diff --git a/R/duration.R b/R/duration.R new file mode 100644 index 00000000..f721483e --- /dev/null +++ b/R/duration.R @@ -0,0 +1,26 @@ +#' Survey Duration +#' +#' Calculate survey duration +#' +#' @param ds A crunch dataset +#' @param starttime A crunch variable containing the starttime +#' @param endtime A crunch variable containing the endtime +#' @param outliers At what factor of overall mean should outliers be dropped? Defaults to 2.5. +#' @export +surveyDuration <- function(ds, starttime=NULL, endtime=NULL, outliers = 2.5) { + if (is.null(starttime)) starttime = ds$starttime + if (is.null(endtime)) endtime = ds$endtime + + res = as.numeric( + as.POSIXct(as.vector(ds$endtime)) - as.POSIXct(as.vector(ds$starttime)) + ) + + # Incompletes are negative + res[res < 0] = NA + # 2.5x mean as outlier + if (!is.na(outliers)) { + res[res > mean(res, na.rm = TRUE) * outliers] = mean(res, na.rm = TRUE) + } + res +} + diff --git a/R/reformatResults.R b/R/reformatResults.R index 68422e39..8c5196fd 100644 --- a/R/reformatResults.R +++ b/R/reformatResults.R @@ -15,16 +15,20 @@ roundPropCategorical <- function(data, digits = 0) { return(rounded) } -# TODO: Convert these random global vars to functions instead. -default_banner <- list( - Results = list( - empty_col = FALSE, - multicols = NA, - multicols_csum = NA, - format_cols = 2, - border_columns = NULL +#' Create default banner +#' +#' Creates a banner for use with \link{reformatLatexResults} +default_banner <- function() { + list( + Results = list( + empty_col = FALSE, + multicols = NA, + multicols_csum = NA, + format_cols = 2, + border_columns = NULL + ) ) -) +} #' Banner Meta Data #' @@ -361,7 +365,7 @@ removeInserts <- function(var, theme) { #' @param theme A crunchtabs theme object from \link{themeNew} reformatLatexResults <- function(result, banner, theme) { if (is.null(banner)) { - banner_info <- default_banner + banner_info <- default_banner() } else { banner_info <- lapply(banner, getBannerInfo, theme = theme) } diff --git a/R/tabbook-additions.R b/R/tabbook-additions.R new file mode 100644 index 00000000..24f49a97 --- /dev/null +++ b/R/tabbook-additions.R @@ -0,0 +1,221 @@ +#' Reflow Question Numbers +#' +#' When we manually add summaries of +#' numeric, datetime or text variables +#' we must "reflow" the question numbers +#' so that they match dataset order. +#' +#' @param x A results object from within the \link{crosstabs} function. +reflowQuestionNumbers <- function(x) { + for (i in 1:length(x)) { + x[[i]]$number = i + } + x +} + +#' Prepare Summary Content +#' +#' Prepare summary content for toplines for classes that +#' are not covered by tabBook such as NumericVariable, DatetimeVariables +#' and TextVariable +#' +#' @param x A variable of class NumericVariable, DatetimeVariable or TextVariable +#' @param ... Additional arguments passed to methods +#' @export +prepareExtraSummary <- function(x, ...) { + UseMethod("prepareExtraSummary", x) +} + +#' @rdname prepareExtraSummary +#' @export +prepareExtraSummary.default <- function(x) { + wrong_class_error(x, c("CategoricalVariable", "CategoricalArrayVariable", "MultipleResponseVariable", "TextVariable", "NumericVariable", "DatetimeVariable"), "codebookItem") +} + +#' Prepare Numeric Content +#' +#' \link[crunch]{tabBook} does not report an appropriate numeric summary +#' without being provided with a multitable. So we "fake" a numeric summary +#' by overwriting the structure of a categorical object. +#' +#' If data are weighted we display Weighted N instead of Unweighted +#' N +#' +#' @param x A variable of class \link[crunch]{NumericVariable} +#' @param weighted Logical. Are these data weighted? +#' @export +prepareExtraSummary.NumericVariable <- function(x, weighted = TRUE) { + y = as.vector(x) + qt = quantile(y, na.rm = TRUE) + minima = min(y, na.rm = TRUE) + maxima = max(y, na.rm = TRUE) + half = median(y, na.rm = TRUE) + mu = mean(y, na.rm = TRUE) + firstq = qt[2] + thirdq = qt[4] + stdev = sd(y, na.rm = TRUE) + + # Mock the content object + + obj = resultsObject( + x, + top = NULL, + weighted = weighted, + body_values = c(minima, firstq, half, mu, thirdq, maxima, stdev), + body_labels = c( + "Minimum", + "1st Quartile", + "Median", + "Mean", + "3rd Quartile", + "Maximum", + "Standard Deviation" + ) + ) + + obj +} + +#' Prepare Datetime Content +#' +#' tabBook does not report an appropriate date time summary without +#' being provided with a multitable. So we "fake" a date time summary +#' by overwriting the structure of a categorical object. +#' +#' If data are weighted we display Weighted N instead of Unweighted +#' N +#' +#' @param x A variable of class \link[crunch]{DatetimeVariable} +#' @param weighted Logical. Are these data weighted? +#' @param tz A timezone. Defaults to UTC. +#' @export +prepareExtraSummary.DatetimeVariable <- function(x, weighted = TRUE, tz = "UTC") { + y = as.POSIXct(as.vector(x), tx = "UTC") + qt = quantile(y, na.rm = TRUE) + minima = min(y, na.rm = TRUE) + maxima = max(y, na.rm = TRUE) + half = median(y, na.rm = TRUE) + firstq = qt[2] + thirdq = qt[4] + + # Mock the content object + obj = resultsObject( + x, + top = NULL, + weighted = weighted, + body_values = c(minima, firstq, half, thirdq, maxima), + body_labels = c( + "Minimum", + "1st Quartile", + "Median", + "3rd Quartile", + "Maximum" + ) + ) + + obj +} + +#' Prepare Text Content +#' +#' tabBook does not report an appropriate date time summary without +#' being provided with a multitable. So we "fake" a date time summary +#' by overwriting the structure of a categorical object. +#' +#' If data are weighted we display Weighted N instead of Unweighted +#' N +#' +#' @param x A variable of class \link[crunch]{TextVariable} +#' @param weighted Logical. Are these data weighted? +#' @param count The number of verbatim responses to present as a sample. Defaults to 10. +#' @export +prepareExtraSummary.TextVariable <- function(x, weighted = TRUE, num = 10L) { + set.seed(42) + y = as.vector(x) + y = sort(sample(unique(y[!is.na(y)]), num, replace = FALSE)) + n = sum(!is.na(y)) # This could be wrong for un-enforced responses such as "" + + # Mock the content object + obj = resultsObject( + x, + weighted = weighted, + body_values = rep("", length(y)), + body_labels = y + ) + + obj +} + + +#' Generic Results Object +#' +#' As \link[crunch]{tabBook} does not provide us with a way to create summaries +#' for some variable types we are forced to create an object that bypasses +#' the reformatVar function. Our goal is to use as much of the +#' code infrastructure for theming purposes as possible while +#' allowing the creation of new topline summary objects +#' +#' @param x A dataset variable +#' @param top The top of the results object. NULL by default +#' @param weighted Logical. Are these data weighted? +#' @param body_values The values to present +#' @param body_labels The labels to present +resultsObject <- function(x, top = NULL, weighted, body_values, body_labels) { + + stopifnot(length(body_values) == length(body_labels)) + + top = top + data_list = list() + data_list$body = data.frame( + x = body_values, + row.names = body_labels + ) + names(data_list$body) = NA_character_ + + # Presentation differences if data are + # weighted or unweighted + + if (weighted) { + data_list$weighted_n = data.frame( + x = sum(!is.na(as.vector(x))), + row.names = "Weighted N" + ) + names(data_list$weighted_n) = NA_character_ + + bottom = c(weighted_n = "weighted_n") + data_order = c("body", weighted_n = "weighted_n") + + } else { + + data_list$unweighted_n = data.frame( + x = sum(!is.na(as.vector(x))), + row.names = "Unweighted N" + ) + names(data_list$unweighted_n) = NA_character_ + + bottom = c(unweighted_n = "unweighted_n") + data_order = c("body",unweighted_n = "unweighted_n") + } + + structure( + list( + alias = alias(x), + name = name(x), + description = ifelse(description(x) == "", name(x), description(x)), + notes = notes(x), + type = class(x)[1], + top = NULL, + bottom = bottom, + data_order = data_order, + inserts = rep("Category", length(body_values)), + data_list = data_list, + min_cell_top = NULL, + no_totals = TRUE, + mean_median = FALSE, + min_cell_body = matrix(rep(NA, length(body_values))), + min_cell_bottom = matrix(FALSE), + min_cell = FALSE, + rownames = c(body_labels, ifelse(weighted, "Weighted N", "Unweighted N"))), + class = c("ToplineVar", "CrossTabVar")) +} + diff --git a/R/tex-table.R b/R/tex-table.R index d55d5749..ef508611 100644 --- a/R/tex-table.R +++ b/R/tex-table.R @@ -53,25 +53,41 @@ latexTableBody <- function(df, theme) { for (nm in intersect(c("body", "totals_row"), names(data))) { # For each column in these data.frames, round and treat as percentages - data[[nm]] <- dfapply(data[[nm]], formatNum, digits = theme$digits) + if (!is.null(df$type)) { + if (df$type == "NumericVariable") { + data[[nm]] <- dfapply(data[[nm]], formatNum, digits = theme$digits_numeric) + } + } else { + data[[nm]] <- dfapply(data[[nm]], formatNum, digits = theme$digits) + } + if (theme$proportions) { # Add a percent sign - data[[nm]] <- dfapply(data[[nm]], paste0, "%") + if (!is.null(df$type)) { + # No action becasue it is one of: Numeric, Datetime, or Text + } else { + data[[nm]] <- dfapply(data[[nm]], paste0, "%") + } + } } # NPR: this one is doing some wacky things currently - for (nm in intersect(c("unweighted_n", "weighted_n"), names(data))) { - this_theme <- theme[[paste0("format_", nm)]] - data[[nm]] <- dfapply(data[[nm]], formatNum) - if (this_theme$latex_add_parenthesis) { - data[[nm]] <- dfapply(data[[nm]], paste_around, "(", ")") - } - alignment <- this_theme$latex_adjust - if (!is.null(alignment) && !topline) { - data[[nm]] <- dfapply(data[[nm]], function(x) { - # Align these cells - multicolumn(1, x, align = alignment) - }) + if (is.null(df$type)) { + for (nm in intersect(c("unweighted_n", "weighted_n"), names(data))) { + this_theme <- theme[[paste0("format_", nm)]] + + data[[nm]] <- dfapply(data[[nm]], formatNum) + + if (this_theme$latex_add_parenthesis) { + data[[nm]] <- dfapply(data[[nm]], paste_around, "(", ")") + } + alignment <- this_theme$latex_adjust + if (!is.null(alignment) && !topline) { + data[[nm]] <- dfapply(data[[nm]], function(x) { + # Align these cells + multicolumn(1, x, align = alignment) + }) + } } } @@ -220,16 +236,16 @@ formatNum <- function(x, digits=0, ...) { #' Importantly, this also controls the relative widths of the columns. #' #' @md -#' @param x An object of one of the types listed +#' @param var An object of one of the types listed #' @param theme A theme object from \link{themeNew} -tableHeader <- function(x, theme) { - UseMethod("tableHeader", x) +tableHeader <- function(var, theme) { + UseMethod("tableHeader", var) } #' @rdname tableHeader #' @export -tableHeader.default <- function(x) { - wrong_class_error(x, c("CrossTabVar", "ToplineVar", "ToplineCategoricalArray"), "tableHeader") +tableHeader.default <- function(var, theme) { + wrong_class_error(var, c("CrossTabVar", "ToplineVar", "ToplineCategoricalArray"), "tableHeader") } #' Header for LongTable with Banner. diff --git a/R/theme.R b/R/theme.R index 08d8f131..1a53ef74 100644 --- a/R/theme.R +++ b/R/theme.R @@ -5,6 +5,7 @@ #' @section Theme Arguments: #' \describe{ #' \item{digits}{A numeric. How many digits should the data be rounded to? (In Excel, this is excel styling.) Defaults to 0.} +#' \item{digits_numeric}{A numeric. How many digits should continuous variable data be rounded to? (In Latex, , this is Latex styling.) Defaults to 2.} #' \item{digits_final}{In Excel, an optional numeric. How many digits should the data be rounded to before being added to Excel?} #' \item{excel_footer}{In Excel, an optional character vector of length 3. The footer text of the file.} #' \item{excel_freeze_column}{In Excel, a numeric. What column should be the last frozen column? Defaults to 1.} @@ -204,6 +205,7 @@ themeDefaultExcel <- function( excel_freeze_column = 1, excel_percent_sign = TRUE, digits = 0, + digits_numeric = 2, one_per_sheet = FALSE, latex_round_percentages = TRUE, latex_headtext = "", @@ -251,6 +253,7 @@ themeDefaultLatex <- function(font = getOption("font", default = "helvet"), format_label_column = c(norm, col_width = NA_real_ , extend_borders = FALSE), format_totals_column = norm, digits = 0, + digits_numeric = 2, one_per_sheet = TRUE, excel_percent_sign = TRUE, excel_show_grid_lines = FALSE, @@ -358,6 +361,7 @@ validators_to_use <- list( decoration = list(mult = TRUE, missing = TRUE, valid = list("bold","strikeout","italic","underline","underline2")), digits = c(class = "numeric", len = 1, missing = FALSE, default = 0), + digits_numeric = c(class = "numeric", len = 1, missing = FALSE, default = 2), digits_final = c(class = "numeric", len = 1, missing = TRUE), dpi = c(class = "numeric", len = 1, missing = FALSE, default = 300), empty_col = c(class = "logical", len = 1, missing = FALSE, default = FALSE), @@ -442,7 +446,7 @@ validators_to_use <- list( #' @param theme An object from \link{themeNew} theme_validator <- function(theme) { theme_required <- c( - "digits", "digits_final", "excel_footer", + "digits","digits_numeric", "digits_final", "excel_footer", "excel_freeze_column", "excel_header", "excel_orientation", "excel_percent_sign", "excel_show_grid_lines", "excel_table_border", "font", "font_color", "font_size", "format_banner_categories", diff --git a/R/writeLatex.R b/R/writeLatex.R index a1554317..02573ccf 100644 --- a/R/writeLatex.R +++ b/R/writeLatex.R @@ -133,37 +133,69 @@ latexReportTables <- function (results, banner, theme) { table_bodies <- list() for (i in 1:length(results)) { # convert to loop for debug + x = results[[i]] - # Do some munging and generate the table bodies to match those header(s) - x <- removeInserts(x, theme) - # Lots of dragons in this "reformat" code :shrug: - content <- reformatLatexResults(x, banner, theme) + if(!x$type %in% c("NumericVariable", "DatetimeVariable", "TextVariable")) { + # Do some munging and generate the table bodies to match those header(s) + x <- removeInserts(x, theme) + # Lots of dragons in this "reformat" code :shrug: + content <- reformatLatexResults(x, banner, theme) - # PT: decide if the final table should be longtable or tabular based on the - # number of responses and latex_max_lines_for_tabular - x$longtable <- calculateIfLongtable(content[[1]], theme) + # PT: decide if the final table should be longtable or tabular based on the + # number of responses and latex_max_lines_for_tabular + x$longtable <- calculateIfLongtable(content[[1]], theme) - # PT: because this is a loop, header is singular (i.e. it's only one table at a time). - header <- tableHeader(x, theme) - body <- sapply(content, latexTableBody, theme = theme) - footer <- ifelse(x$longtable | !theme$topline, "\n\\end{longtable}", "\n\\end{tabular}") + # PT: because this is a loop, header is singular (i.e. it's only one table at a time). + header <- tableHeader(x, theme) + body <- sapply(content, latexTableBody, theme = theme) + footer <- ifelse( + x$longtable | !theme$topline, "\n\\end{longtable}", "\n\\end{tabular}") + + # This paste will collapse the perhaps multiple banner tables into a + # single string of LaTeX. + table <- paste( + header, + body, + footer, + sep = "\n", + collapse = "\n\n\n" + ) + if (x$longtable) { + # centers longtables because otherwise the head/foot text are not centered + # but doesn't center tabular because it actually ends up being uncentered + # if centered is used on tabulars. + table <- center(table) + } + } else { + # Customized path for variables that are manually added + # to tabBook results. tabBook does not provide summaries for + # numeric, datetime nor text variables which we need for + # creating a basic codebook + + x$longtable <- calculateIfLongtable(x, theme) + header <- tableHeader(x, theme) + body <- latexTableBody(x, theme) + footer <- ifelse(x$longtable | !theme$topline, "\n\\end{longtable}", "\n\\end{tabular}") + + table <- paste( + header, + body, + footer, + sep = "\n", + collapse = "\n\n\n" + ) + + if (x$longtable) { + # centers longtables because otherwise the head/foot text are not centered + # but doesn't center tabular because it actually ends up being uncentered + # if centered is used on tabulars. + table <- center(table) + } - # This paste will collapse the perhaps multiple banner tables into a - # single string of LaTeX. - table <- paste( - header, - body, - footer, - sep = "\n", - collapse = "\n\n\n" - ) - if (x$longtable) { - # centers longtables because otherwise the head/foot text are not centered - # but doesn't center tabular because it actually ends up being uncentered - # if centered is used on tabulars. - table <- center(table) } + + table_bodies[[i]] = table } diff --git a/inst/httptest/redact.R b/inst/httptest/redact.R new file mode 100644 index 00000000..8b90521f --- /dev/null +++ b/inst/httptest/redact.R @@ -0,0 +1,4 @@ +# Use rcrunch's redactor file exactly +# https://github.com/Crunch-io/rcrunch/blob/master/inst/httptest/redact.R +# You could also write your own +source(system.file("httptest/redact.R", package = "crunch", mustWork = TRUE)) diff --git a/man/codebookItem.Rd b/man/codebookItem.Rd new file mode 100644 index 00000000..8d420bca --- /dev/null +++ b/man/codebookItem.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codebook.R +\name{codebookItem} +\alias{codebookItem} +\alias{codebookItem.default} +\alias{codebookItem.CategoricalVariable} +\alias{codebookItem.CategoricalArrayVariable} +\alias{codebookItem.MultipleResponseVariable} +\alias{codebookItem.NumericVariable} +\alias{codebookItem.TextVariable} +\alias{codebookItem.DatetimeVariable} +\title{Generate a codebook item} +\usage{ +codebookItem(x, ...) + +\method{codebookItem}{default}(x) + +\method{codebookItem}{CategoricalVariable}(x) + +\method{codebookItem}{CategoricalArrayVariable}(x) + +\method{codebookItem}{MultipleResponseVariable}(x) + +\method{codebookItem}{NumericVariable}(x) + +\method{codebookItem}{TextVariable}(x) + +\method{codebookItem}{DatetimeVariable}(x) +} +\arguments{ +\item{x}{An object of one of the types listed} +} +\description{ +A passthrough function that creates a table header appropriate to the class +of the data object being passed. Expected classes are: +} +\details{ +\itemize{ +\item CategoricalVariable +\item CategoricalArrayVariable +\item MultipleResponseVariable +\item NumericVariable +\item TextVariable +\item DatetimeVariable +} + +Importantly, this also controls the relative widths of the columns. +} +\section{Methods (by class)}{ +\itemize{ +\item \code{CategoricalVariable}: Prepares a codebookItem for a CategoricalVariable + +\item \code{CategoricalArrayVariable}: Prepares a codebookitem for a CategoricalArrayVariable + +\item \code{MultipleResponseVariable}: Prepares a codebookitem for a MultipleResponseVariable + +\item \code{NumericVariable}: Prepares a codebookitem for a NumericVariable + +\item \code{TextVariable}: Prepares a codebookitem for a TextVariable + +\item \code{DatetimeVariable}: Prepares a codebookitem for a DatetimeVariable +}} + diff --git a/man/codebookItemTxt.Rd b/man/codebookItemTxt.Rd new file mode 100644 index 00000000..1b3eb0f2 --- /dev/null +++ b/man/codebookItemTxt.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codebook.R +\name{codebookItemTxt} +\alias{codebookItemTxt} +\title{Extract basic question information} +\usage{ +codebookItemTxt(x) +} +\description{ +Extracts the following: +} +\details{ +\itemize{ +\item body +\item alias +\item description or question text +\item notes or filter text +\item id +} +} diff --git a/man/fixExtraSummaryWeights.Rd b/man/fixExtraSummaryWeights.Rd new file mode 100644 index 00000000..ec00bea9 --- /dev/null +++ b/man/fixExtraSummaryWeights.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabbook-additions.R +\name{fixExtraSummaryWeights} +\alias{fixExtraSummaryWeights} +\title{Weighting Object Titles} +\usage{ +fixExtraSummaryWeights(obj, weighted = FALSE) +} +\arguments{ +\item{obj}{The obj created by \link{prepareExtraSummary}} + +\item{weighted}{Is the set weighted?} +} +\description{ +If the object is weighted +there is a slight difference in +the naming required to align +with existing patterns +} diff --git a/man/prepareExtraSummary.DatetimeVariable.Rd b/man/prepareExtraSummary.DatetimeVariable.Rd new file mode 100644 index 00000000..2e74f85f --- /dev/null +++ b/man/prepareExtraSummary.DatetimeVariable.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabbook-additions.R +\name{prepareExtraSummary.DatetimeVariable} +\alias{prepareExtraSummary.DatetimeVariable} +\title{Prepare Datetime Content} +\usage{ +\method{prepareExtraSummary}{DatetimeVariable}(x, weighted = TRUE, tz = "UTC") +} +\arguments{ +\item{x}{A variable of class \link[crunch]{DatetimeVariable}} + +\item{weighted}{Logical. Are these data weighted?} + +\item{tz}{A timezone. Defaults to UTC.} +} +\description{ +tabBook does not report an appropriate date time summary without +being provided with a multitable. So we "fake" a date time summary +by overwriting the structure of a categorical object. +} +\details{ +If data are weighted we display Weighted N instead of Unweighted +N +} diff --git a/man/prepareExtraSummary.NumericVariable.Rd b/man/prepareExtraSummary.NumericVariable.Rd new file mode 100644 index 00000000..960edbd0 --- /dev/null +++ b/man/prepareExtraSummary.NumericVariable.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabbook-additions.R +\name{prepareExtraSummary.NumericVariable} +\alias{prepareExtraSummary.NumericVariable} +\title{Prepare Numeric Content} +\usage{ +\method{prepareExtraSummary}{NumericVariable}(x, weighted = TRUE) +} +\arguments{ +\item{x}{A variable of class \link[crunch]{NumericVariable}} + +\item{weighted}{Logical. Are these data weighted?} +} +\description{ +tabBook does not report an appopriate numeric summary without +being provided with a multitable. So we "fake" a numeric summary +as by overwriting the structure of a categorical object. +} +\details{ +If data are weighted we display Weighted N instead of Unweighted +N +} diff --git a/man/prepareExtraSummary.Rd b/man/prepareExtraSummary.Rd new file mode 100644 index 00000000..36a4c7dc --- /dev/null +++ b/man/prepareExtraSummary.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabbook-additions.R +\name{prepareExtraSummary} +\alias{prepareExtraSummary} +\alias{prepareExtraSummary.default} +\title{Prepare Summary Content} +\usage{ +prepareExtraSummary(x, ...) + +\method{prepareExtraSummary}{default}(x) +} +\arguments{ +\item{x}{A variable of class NumericVariable, DatetimeVariable or TextVariable} + +\item{...}{Additional arguments passed to methods} +} +\description{ +Prepare summary content for codebooks. +} diff --git a/man/prepareExtraSummary.TextVariable.Rd b/man/prepareExtraSummary.TextVariable.Rd new file mode 100644 index 00000000..6378f4ab --- /dev/null +++ b/man/prepareExtraSummary.TextVariable.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabbook-additions.R +\name{prepareExtraSummary.TextVariable} +\alias{prepareExtraSummary.TextVariable} +\title{Prepare Text Content} +\usage{ +\method{prepareExtraSummary}{TextVariable}(x, weighted = TRUE, n = 10L) +} +\arguments{ +\item{x}{A variable of class \link[crunch]{TextVariable}} + +\item{weighted}{Logical. Are these data weighted?} + +\item{n}{The number of verbatim responses to present as a sample. Defaults to 10.} +} +\description{ +tabBook does not report an appropriate date time summary without +being provided with a multitable. So we "fake" a date time summary +by overwriting the structure of a categorical object. +} +\details{ +If data are weighted we display Weighted N instead of Unweighted +N +} diff --git a/man/reflowQuestionNumbers.Rd b/man/reflowQuestionNumbers.Rd new file mode 100644 index 00000000..5dd9e7aa --- /dev/null +++ b/man/reflowQuestionNumbers.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabbook-additions.R +\name{reflowQuestionNumbers} +\alias{reflowQuestionNumbers} +\title{Reflow Question Numbers} +\usage{ +reflowQuestionNumbers(x) +} +\arguments{ +\item{x}{A crosstabs object from the \link{crosstabs} function.} +} +\description{ +When we manually add summaries of +numeric, datetime or text variables +we must "reflow" the question numbers +so that they match dataset order. +} diff --git a/man/surveyDuration.Rd b/man/surveyDuration.Rd new file mode 100644 index 00000000..39ae2b29 --- /dev/null +++ b/man/surveyDuration.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/duration.R +\name{surveyDuration} +\alias{surveyDuration} +\title{Survey Duration} +\usage{ +surveyDuration(ds, starttime = NULL, endtime = NULL, outliers = 2.5) +} +\arguments{ +\item{ds}{A crunch dataset} + +\item{starttime}{A crunch variable containing the starttime} + +\item{endtime}{A crunch variable containing the endtime} + +\item{outliers}{At what factor of overall mean should outliers be dropped? Defaults to 2.5.} +} +\description{ +Calculate survey duration +} diff --git a/man/tableHeader.Rd b/man/tableHeader.Rd index d04358e7..498fc276 100644 --- a/man/tableHeader.Rd +++ b/man/tableHeader.Rd @@ -8,9 +8,9 @@ \alias{tableHeader.ToplineCategoricalArray} \title{Generate a tableHeader} \usage{ -tableHeader(x, theme) +tableHeader(var, theme) -\method{tableHeader}{default}(x) +\method{tableHeader}{default}(var, theme) \method{tableHeader}{CrossTabVar}(var, theme) @@ -19,11 +19,9 @@ tableHeader(x, theme) \method{tableHeader}{ToplineCategoricalArray}(var, theme) } \arguments{ -\item{x}{An object of one of the types listed} +\item{var}{An object of class ToplineCategoricalArray} \item{theme}{A theme object from \link{themeNew}} - -\item{var}{An object of class ToplineCategoricalArray} } \description{ A passthrough function that creates a table header appropriate to the class diff --git a/man/themeNew.Rd b/man/themeNew.Rd index 47a46a57..af6f2173 100644 --- a/man/themeNew.Rd +++ b/man/themeNew.Rd @@ -18,6 +18,7 @@ themeNew(..., default_theme = themeDefaultExcel()) \describe{ \item{digits}{A numeric. How many digits should the data be rounded to? (In Excel, this is excel styling.) Defaults to 0.} +\item{digits_numeric}{A numeric. How many digits should continuous variable data be rounded to? (In Latex, , this is Latex styling.) Defaults to 2.} \item{digits_final}{In Excel, an optional numeric. How many digits should the data be rounded to before being added to Excel?} \item{excel_footer}{In Excel, an optional character vector of length 3. The footer text of the file.} \item{excel_freeze_column}{In Excel, a numeric. What column should be the last frozen column? Defaults to 1.} diff --git a/man/writeCodebook.Rd b/man/writeCodebook.Rd new file mode 100644 index 00000000..28a3c7df --- /dev/null +++ b/man/writeCodebook.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codebook.R +\name{writeCodebook} +\alias{writeCodebook} +\title{Generate LaTeX CodeBooks} +\usage{ +writeCodebook(...) +} +\arguments{ +\item{...}{Additional arguments passed to writeLatx} + +\item{ds}{} +} +\description{ +\code{writeCodebook} produces publication-quality LaTeX reports +} diff --git a/tests/testthat/fixtures-1-2-5-weighted/app.crunch.io/api/progress.json b/tests/testthat/fixtures-1-2-5-weighted/app.crunch.io/api/progress.json new file mode 100644 index 00000000..18392773 --- /dev/null +++ b/tests/testthat/fixtures-1-2-5-weighted/app.crunch.io/api/progress.json @@ -0,0 +1,11 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/progress/", + "views": { + "result": "https://app.crunch.io/api/progress/result/" + }, + "value": { + "progress": 100, + "message": "complete" + } +} diff --git a/tests/testthat/fixtures-1-2-5/app.crunch.io/api/progress.json b/tests/testthat/fixtures-1-2-5/app.crunch.io/api/progress.json new file mode 100644 index 00000000..18392773 --- /dev/null +++ b/tests/testthat/fixtures-1-2-5/app.crunch.io/api/progress.json @@ -0,0 +1,11 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/progress/", + "views": { + "result": "https://app.crunch.io/api/progress/result/" + }, + "value": { + "progress": 100, + "message": "complete" + } +} diff --git a/tests/testthat/fixtures/buildFixtures-1-2-5.R b/tests/testthat/fixtures/buildFixtures-1-2-5.R new file mode 100644 index 00000000..5316ea93 --- /dev/null +++ b/tests/testthat/fixtures/buildFixtures-1-2-5.R @@ -0,0 +1,77 @@ +library(crunchtabs) +library(httptest) + +# We build two sets of exmaples. Unweighted and weighted + +login() + +# Assumes your wd is project dir +rm(list = ls()) +ds <- newExampleDataset() +start_capturing("tests/testthat/fixtures-1-2-5") + +ds <- loadDataset("Example dataset") + +# Unweighted ---- + +ct_banner <- banner( + ds, + vars = list(`banner 1` = 'allpets') +) + +topline_unweighted <- crosstabs(ds) + +ct_unweighted <- crosstabs( + dataset = ds, + banner = ct_banner +) + +stop_capturing() + +# Weighted ---- + +start_capturing("tests/testthat/fixtures-1-2-5-weighted") + +ds <- loadDataset("Example dataset") + +ct_banner <- banner( + ds, + vars = list(`banner 1` = 'allpets') +) + +ds$weight = makeWeight(ds$q1 ~ c(0.75, 0.15, 0.10), name = "weight") +modifyWeightVariables(ds, "weight") + +topline_weighted <- crosstabs(ds, weight = "weight") + +missings = setdiff(names(ds), names(topline_weighted$results)) + +topline_withExtraSummary <- topline_weighted + +topline_withExtraSummary$results <- c( + topline_withExtraSummary$results, + setNames(lapply(missings, function(x) prepareExtraSummary(ds[[x]])), missings) +) + +results_list <- list() +for (x in names(ds)) { + results_list[[x]] <- topline_withExtraSummary$results[[x]] +} + +topline_withExtraSummary$results = results_list + +topline_withExtraSummary <- crunchtabs:::reflowQuestionNumbers(ct) + +ct_banner <- banner( + ds, + vars = list(`banner 1` = 'allpets') +) + +ct_weighted <- crosstabs( + dataset = ds, + weight = 'weight', + banner = ct_banner +) + +stop_capturing() +with_consent(deleteDataset("Example dataset")) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index fb7879a1..bbb6bbf2 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,4 +1,5 @@ - # Setup is executed when tests are run, but not when loadall is run -source(system.file("crunch-test.R", package="crunch")) \ No newline at end of file +source(system.file("crunch-test.R", package="crunch")) + + diff --git a/tests/testthat/test-prepareExtraSummary.R b/tests/testthat/test-prepareExtraSummary.R new file mode 100644 index 00000000..28b003a1 --- /dev/null +++ b/tests/testthat/test-prepareExtraSummary.R @@ -0,0 +1,31 @@ + +with_api_fixture <- function(fixture_path, expr) { + with( + crunch::temp.options( + crunch.api = "https://app.crunch.io/api/", + httptest.mock.paths = fixture_path + ), + httptest::with_mock_api(expr) + ) +} + +context("prepareExtraSummary NumericVariable") + +with_api_fixture("fixtures-1-2-5", { + + ds <- loadDataset("Example dataset") + + test_that("dataset is not weighted", { + expect_null(weight(ds)) + }) + + test_that("dataset is accessible", { + expect_true(exists("ds")) + }) + + ct <- crosstabs(ds) + +}) + +context("prepareExtraSummary DatetimeVariable") +context("prepareExtraSummary TextVariable") diff --git a/vignettes/FAQ.Rmd b/vignettes/FAQ.Rmd index 30ff213e..046aab94 100644 --- a/vignettes/FAQ.Rmd +++ b/vignettes/FAQ.Rmd @@ -66,6 +66,49 @@ myAwesomeTheme = themeNew( ) ``` +### How do I display questions with different weights in the same report? + +We can chain crosstabs objects easily to create the desired effect. Below, we create two crosstabs objects with the variables and the weighting we want applied and then we concatenate them together. + +```{r, eval = FALSE} +# ... +topline_summary = crosstabs(ds, vars = c("a", "b"), weight = "weightvar") +chain_me = crosstabs(ds, vars = c("c"), weight = "other_weightvar") + +topline_summary$results = c( + topline_summary$results, + chain_me$results +) + +topline_summary$results = crunchtabs:::reflowQuestionNumbers( + topline_summary$results +) + +writeLatex(topline_summary) +``` + +Re-weighting the same variable with another weight in the same report is possible but requires you to also adjust the names so that you can edit the question text appropriately to demonstrate the different weighting: + +```{r, eval = FALSE} +# ... +topline_summary = crosstabs(ds, vars = c("a", "b"), weight = "weightvar") +chain_me = crosstabs(ds, vars = c("a"), weight = "other_weightvar") + +names(chain_me$results) = "a_other_weight_var" # rename the alias +chain_me$results$a_other_weight_var$description = "This is question text (weighted by other_weight_var)" + +topline_summary$results = c( + topline_summary$results, + chain_me$results +) + +topline_summary$results = crunchtabs:::reflowQuestionNumbers( + topline_summary$results +) + +writeLatex(topline_summary) +``` + ### How do I install a different release of crunchtabs? In the code example below we show you how to install Release 1.2.1: