Merge pull request #109 from Crunch-io/issue_069

WIP: New summarization methods for crunchtabs toplines
Crunch-io · May 8, 2020 · 4de3a14 · 4de3a14
2 parents 6723f1e + 5d4bdcf
commit 4de3a14
Show file tree

Hide file tree

Showing 30 changed files with 1,071 additions and 70 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,7 +6,7 @@ Description: In order to generate custom survey reports, this package provides
     'banners' (cross-tabulations) of datasets in the Crunch
     (<https://crunch.io/>) web service. Reports can be written in 'PDF' format
     using 'LaTeX' or in Microsoft Excel '.xlsx' files.
-Version: 1.2.3
+Version: 1.2.5
 Authors@R: c(
     person("Persephone", "Tsebelis", role="aut"),
     person("Kamil", "Sedrowicz", role="aut"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,27 +4,42 @@ S3method(calculateIfLongtable,CrossTabVar)
 S3method(calculateIfLongtable,ToplineCategoricalArray)
 S3method(calculateIfLongtable,ToplineVar)
 S3method(calculateIfLongtable,default)
+S3method(codebookItem,CategoricalArrayVariable)
+S3method(codebookItem,CategoricalVariable)
+S3method(codebookItem,DatetimeVariable)
+S3method(codebookItem,MultipleResponseVariable)
+S3method(codebookItem,NumericVariable)
+S3method(codebookItem,TextVariable)
 S3method(getName,BannerVar)
 S3method(getName,CrossTabVar)
 S3method(getName,CrunchCube)
 S3method(getName,CrunchTabs)
 S3method(getName,ToplineBase)
 S3method(getName,default)
+S3method(prepareExtraSummary,DatetimeVariable)
+S3method(prepareExtraSummary,NumericVariable)
+S3method(prepareExtraSummary,TextVariable)
+S3method(prepareExtraSummary,default)
 S3method(print,Crosstabs)
 S3method(print,Toplines)
 S3method(tableHeader,CrossTabVar)
 S3method(tableHeader,ToplineCategoricalArray)
 S3method(tableHeader,ToplineVar)
 S3method(tableHeader,default)
 export(banner)
+export(codebookItem)
+export(codebookItemTxt)
 export(crosstabs)
 export(getName)
+export(prepareExtraSummary)
+export(surveyDuration)
 export(themeDefaultExcel)
 export(themeDefaultLatex)
 export(themeHuffPoCrosstabs)
 export(themeHuffPoToplines)
 export(themeNew)
 export(themeUKPolitical)
+export(writeCodebook)
 export(writeExcel)
 export(writeLatex)
 import(crunch)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,19 @@
+## crunchtabs 1.2.5
+
+* Add summarization for continuous variables (NumericVariable) in toplines (#80)
+* Add summarization for date time variable (DatetimeVariable) in toplines (#120)
+* Add summarization for text data (TextVariable) in toplines (#121)
+* Add amsmath package to latex preamble (#123)
+* Bugfix for open=TRUE, PDF doc now opens appropriately (#125)
+* Update FAQ: question alias numbering example (#128)
+* Update FAQ: customizing stub widths (#118)
+* Update FAQ: specifying logos in excel (#115)
+* Internal proposal for new summarizations and codebook (#117)
+* Fix codecov checks (#111)
+
 ## crunchtabs 1.2.3
 
+* New introductory and FAQ vignettes (#96)
 * tableHeader.CrossTabVar respects global stub width setting (#93)
 * Updated vignette (#65)
 * Adding more tests (#84)

diff --git a/R/codebook.R b/R/codebook.R
@@ -0,0 +1,158 @@
+#' Generate a codebook item
+#'
+#' A passthrough function that creates a table header appropriate to the class
+#' of the data object being passed. Expected classes are:
+#'
+#' * CategoricalVariable
+#' * CategoricalArrayVariable
+#' * MultipleResponseVariable
+#' * NumericVariable
+#' * TextVariable
+#' * DatetimeVariable
+#'
+#' Importantly, this also controls the relative widths of the columns.
+#'
+#' @md
+#' @param x An object of one of the types listed
+#' @export
+codebookItem <- function(x, ...) {
+  UseMethod("codebookItem", x)
+}
+
+#' @rdname codebookItem
+codebookItem.default <- function(x) {
+  wrong_class_error(x, c("CategoricalVariable", "CategoricalArrayVariable", "MultipleResponseVariable", "TextVariable", "NumericVariable", "DatetimeVariable"), "codebookItem")
+}
+
+
+#' Extract basic question information
+#'
+#' Extracts the following:
+#'
+#' * body
+#' * alias
+#' * description or question text
+#' * notes or filter text
+#' * id
+#'
+#' @md
+#' @export
+codebookItemTxt <- function(x) {
+  txt <- x@tuple@body
+  l <- list()
+  l$title <- txt$alias
+  l$alias <- txt$alias
+  l$name <- txt$name
+  l$id <- txt$id
+  l$filter_text <- txt$notes
+  l$question <- txt$description
+  l
+}
+
+#'
+codebookItemSubVars <- function (x) {
+  sv <- subvariables(x)
+  als <- unname(unlist(lapply(sv@index, getElement, "alias")))
+  resp <- unname(unlist(lapply(sv@index, getElement, "name")))
+  sv <- data.frame(`Sub Alias` = als, Name = resp)
+  sv_responses <- categories(x)
+
+  list(
+    key = sv,
+    key2 = setNames(sv_responses, c("Response", "Value"))
+  )
+
+}
+
+
+#' @describeIn codebookItem Prepares a codebookItem for a CategoricalVariable
+#' @export
+codebookItem.CategoricalVariable <- function(x) {
+  txt <- codebookItemTxt(x)
+  cats <- categories(x)
+  responses <- do.call(rbind, cats@.Data)
+  responses <- as.data.frame(responses[!unlist(responses[, "missing"]), ])
+  responses <- lapply(responses, unlist)
+  if (all(is.null(responses$numeric_value)))
+    responses$numeric_value = responses$id
+  responses <- as.data.frame(lapply(responses, unlist)[c("name", "numeric_value")])
+  names(responses) <- c("Response", "Value")
+
+  latexTop <- sprintf("
+    \\thispagestyle{fancy}
+    \\lhead{%s}
+    \\setlength{\\extrarowheight}{20pt}
+    \\begin{tabular*}{7in}{p{2.5in}p{4.5in}}
+    Question  & %s \\\\
+    Name  & %s \\\\
+    Alias  & %s \\\\
+    ID  & %s \\\\
+    Filtering Notes  & %s \\\\
+    \\end{tabular*}
+    ",
+    txt$meta$name,
+    txt$meta$question,
+    txt$meta$name,
+    txt$meta$alias,
+    txt$meta$id,
+    ifelse(txt$meta$filter_text == "", "None", txt$meta$filter_text)
+    )
+  latexTop <- gsub("    ", "", latexTop)
+
+  latexResponse
+
+  latexTop
+}
+
+#' @describeIn codebookItem Prepares a codebookitem for a CategoricalArrayVariable
+#' @export
+codebookItem.CategoricalArrayVariable <- function(x) {
+  txt = codebookItemTxt(x)
+  subvars = codebookItemSubVars(x)
+
+
+}
+
+#' @describeIn codebookItem Prepares a codebookitem for a MultipleResponseVariable
+#' @export
+codebookItem.MultipleResponseVariable <- function(x) {
+
+}
+
+#' @describeIn codebookItem Prepares a codebookitem for a NumericVariable
+#' @export
+codebookItem.NumericVariable <- function(x) {
+  tmp <- c(summary(c_var), SD = sd(c_var, na.rm = TRUE))
+  tmp <- tibble::rownames_to_column(as.data.frame(tmp))
+  colnames(tmp) <- c("Summary", "Value")
+  tmp$Value <- round(tmp$Value, 2)
+}
+
+#' @describeIn codebookItem Prepares a codebookitem for a TextVariable
+#' @export
+codebookItem.TextVariable <- function(x) {
+
+}
+
+#' @describeIn codebookItem Prepares a codebookitem for a DatetimeVariable
+#' @export
+codebookItem.DatetimeVariable <- function(x) {
+
+}
+
+#' Generate LaTeX CodeBooks
+#'
+#' \code{writeCodebook} produces publication-quality LaTeX reports
+#'
+#' @param ds A crunch dataset
+#' @param ds
+#'
+#' @param ... Additional arguments passed to writeLatx
+#'
+#' @importFrom utils installed.packages
+#' @export
+writeCodebook <- function(...) {
+
+  writeLatex(...)
+
+}
diff --git a/R/crosstabs.R b/R/crosstabs.R
@@ -11,6 +11,10 @@
 #' @param banner An optional object of class \code{Banner} that should be used to generate
 #' a Crosstabs summary. Defaults to \code{NULL} - a Toplines summary is produced and returned.
 #' @param codebook If \code{TRUE}, codebook data summaries are prepared. Defaults to \code{FALSE}.
+#' @param include_numeric Logical. Should we include numeric questions? Defaults to FALSE. Implemented for Toplines only.
+#' @param include_datetime Logical. Should we include date time questions? Defaults to FALSE. Implemented for Toplines only.
+#' @param include_verbatims Logical. Should we include a sample text varaibles? Defaults to FALSE. Implemented for Toplines only.
+#' @param num_verbatims An integer identifying the number of examples to extract from a text variable. Defaults to 10. Implemented for Toplines only.
 #' @return A Toplines (when no banner is provided) or Crosstabs (when a banner is provided)
 #' summary of the input dataset.
 #' @examples
@@ -22,7 +26,7 @@
 #' @importFrom crunch name aliases allVariables is.Numeric is.dataset weight alias weightVariables is.variable
 #' @importFrom methods is
 #' @export
-crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE) {
+crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE, include_numeric = FALSE, include_datetime = FALSE, include_verbatims = FALSE, num_verbatims = 10) {
 
   # TODO: open ends
   wrong_class_error(dataset, "CrunchDataset", "dataset")
@@ -61,10 +65,11 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
   vars_out <- if (codebook) { vars } else {
     intersect(vars, all_aliases[all_types %in% c("categorical", "multiple_response", "categorical_array", "numeric")]) }
 
-  error_if_items(
-    unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
-    "`vars` of type(s) {items} are not supported and have been skipped.",
-    and = TRUE, error = FALSE)
+
+  # error_if_items(
+  #   unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
+  #   "`vars` of type(s) {items} are not supported and have been skipped.",
+  #   and = TRUE, error = FALSE)
 
   if (length(vars_out) == 0) {
     stop("No variables provided.")
@@ -79,7 +84,13 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
     banner
   }
 
-  results <- tabBooks(dataset = dataset, vars = vars_out, banner = banner_use, weight = weight_var, topline = is.null(banner))
+  results <- tabBooks(
+    dataset = dataset,
+    vars = vars_out,
+    banner = banner_use,
+    weight = weight_var,
+    topline = is.null(banner)
+  )
 
   if (codebook) {
     res_class <- "Codebook"
@@ -106,12 +117,91 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
     class(banner) <- 'Banner'
   }
 
+  # Here we create logic for including summaries
+  # for variable types that did not previously have
+  # summaries (Numeric, Datetime, Text)
+
+  var_types <- unlist(lapply(dataset[vars], class))
+  names(var_types) <- vars
+  numerics <- vars[var_types == "NumericVariable"]
+  datetimes <- vars[var_types == "DatetimeVariable"]
+  verbatims <- vars[var_types == "TextVariable"]
+
+  if (include_numeric & length(numerics) > 0) {
+    # drop weighting vars
+    weightVars <- unlist(
+      lapply(numerics, function(x) is.weightVariable(dataset[[x]]))
+    )
+
+    numerics <- numerics[!weightVars]
+
+    numRes <- lapply(numerics, function(x) {
+        prepareExtraSummary(
+          dataset[[x]],
+          weighted = !is.null(weight)
+        )
+    })
+    names(numRes) <- numerics
+
+    results = c(
+      results, numRes
+    )
+  }
+
+  if (include_datetime & length(datetimes) > 0) {
+
+    datetimeRes <- lapply(datetimes, function(x) {
+      prepareExtraSummary(
+        dataset[[x]],
+        weighted = !is.null(weight)
+      )
+    })
+
+    names(datetimeRes) <- datetimes
+    results = c(
+      results,
+      datetimeRes
+    )
+  }
+
+  if (include_verbatims & length(verbatims) > 0) {
+    verbatimRes <- lapply(verbatims, function(x) {
+      prepareExtraSummary(
+        dataset[[x]],
+        weighted = !is.null(weight)
+      )
+    })
+
+    names(verbatimRes) <- verbatims
+
+    results = c(
+      results,
+      verbatimRes
+    )
+  }
+
+  if (include_verbatims | include_datetime | include_numeric) {
+    # If we include new question types we must reflow question
+    # numbers because otherwise they will be missing from the
+    # faked objects
+
+    # First re-flow in dataset order
+    tmpResults <- list()
+    for (i in vars) { tmpResults[[i]] <- results[[i]] }
+    # Then re-flow question numbers
+    results = reflowQuestionNumbers(tmpResults)
+  }
+
   summary_data <- list(
     metadata = c(
       list(
         title = name(dataset), weight = weight,
         start_date = crunch::startDate(dataset), end_date = crunch::endDate(dataset),
-        description = crunch::description(dataset))), results = results, banner = banner
+        description = crunch::description(dataset)
+        )
+    ),
+    results = results,
+    banner = banner
   )
 
   class(summary_data) <- res_class