Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: New summarization methods for crunchtabs toplines #109

Merged
merged 11 commits into from
May 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: In order to generate custom survey reports, this package provides
'banners' (cross-tabulations) of datasets in the Crunch
(<https://crunch.io/>) web service. Reports can be written in 'PDF' format
using 'LaTeX' or in Microsoft Excel '.xlsx' files.
Version: 1.2.3
Version: 1.2.5
Authors@R: c(
person("Persephone", "Tsebelis", role="aut"),
person("Kamil", "Sedrowicz", role="aut"),
Expand Down
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,42 @@ S3method(calculateIfLongtable,CrossTabVar)
S3method(calculateIfLongtable,ToplineCategoricalArray)
S3method(calculateIfLongtable,ToplineVar)
S3method(calculateIfLongtable,default)
S3method(codebookItem,CategoricalArrayVariable)
S3method(codebookItem,CategoricalVariable)
S3method(codebookItem,DatetimeVariable)
S3method(codebookItem,MultipleResponseVariable)
S3method(codebookItem,NumericVariable)
S3method(codebookItem,TextVariable)
S3method(getName,BannerVar)
S3method(getName,CrossTabVar)
S3method(getName,CrunchCube)
S3method(getName,CrunchTabs)
S3method(getName,ToplineBase)
S3method(getName,default)
S3method(prepareExtraSummary,DatetimeVariable)
S3method(prepareExtraSummary,NumericVariable)
S3method(prepareExtraSummary,TextVariable)
S3method(prepareExtraSummary,default)
S3method(print,Crosstabs)
S3method(print,Toplines)
S3method(tableHeader,CrossTabVar)
S3method(tableHeader,ToplineCategoricalArray)
S3method(tableHeader,ToplineVar)
S3method(tableHeader,default)
export(banner)
export(codebookItem)
export(codebookItemTxt)
export(crosstabs)
export(getName)
export(prepareExtraSummary)
export(surveyDuration)
export(themeDefaultExcel)
export(themeDefaultLatex)
export(themeHuffPoCrosstabs)
export(themeHuffPoToplines)
export(themeNew)
export(themeUKPolitical)
export(writeCodebook)
export(writeExcel)
export(writeLatex)
import(crunch)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
## crunchtabs 1.2.5

* Add summarization for continuous variables (NumericVariable) in toplines (#80)
* Add summarization for date time variable (DatetimeVariable) in toplines (#120)
* Add summarization for text data (TextVariable) in toplines (#121)
* Add amsmath package to latex preamble (#123)
* Bugfix for open=TRUE, PDF doc now opens appropriately (#125)
* Update FAQ: question alias numbering example (#128)
* Update FAQ: customizing stub widths (#118)
* Update FAQ: specifying logos in excel (#115)
* Internal proposal for new summarizations and codebook (#117)
* Fix codecov checks (#111)

## crunchtabs 1.2.3

* New introductory and FAQ vignettes (#96)
* tableHeader.CrossTabVar respects global stub width setting (#93)
* Updated vignette (#65)
* Adding more tests (#84)
Expand Down
158 changes: 158 additions & 0 deletions R/codebook.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#' Generate a codebook item
#'
#' A passthrough function that creates a table header appropriate to the class
#' of the data object being passed. Expected classes are:
#'
#' * CategoricalVariable
#' * CategoricalArrayVariable
#' * MultipleResponseVariable
#' * NumericVariable
#' * TextVariable
#' * DatetimeVariable
#'
#' Importantly, this also controls the relative widths of the columns.
#'
#' @md
#' @param x An object of one of the types listed
#' @export
codebookItem <- function(x, ...) {
UseMethod("codebookItem", x)
}

#' @rdname codebookItem
codebookItem.default <- function(x) {
wrong_class_error(x, c("CategoricalVariable", "CategoricalArrayVariable", "MultipleResponseVariable", "TextVariable", "NumericVariable", "DatetimeVariable"), "codebookItem")
}


#' Extract basic question information
#'
#' Extracts the following:
#'
#' * body
#' * alias
#' * description or question text
#' * notes or filter text
#' * id
#'
#' @md
#' @export
codebookItemTxt <- function(x) {
txt <- x@tuple@body
l <- list()
l$title <- txt$alias
l$alias <- txt$alias
l$name <- txt$name
l$id <- txt$id
l$filter_text <- txt$notes
l$question <- txt$description
l
}

#'
codebookItemSubVars <- function (x) {
sv <- subvariables(x)
als <- unname(unlist(lapply(sv@index, getElement, "alias")))
resp <- unname(unlist(lapply(sv@index, getElement, "name")))
sv <- data.frame(`Sub Alias` = als, Name = resp)
sv_responses <- categories(x)

list(
key = sv,
key2 = setNames(sv_responses, c("Response", "Value"))
)

}


#' @describeIn codebookItem Prepares a codebookItem for a CategoricalVariable
#' @export
codebookItem.CategoricalVariable <- function(x) {
txt <- codebookItemTxt(x)
cats <- categories(x)
responses <- do.call(rbind, cats@.Data)
responses <- as.data.frame(responses[!unlist(responses[, "missing"]), ])
responses <- lapply(responses, unlist)
if (all(is.null(responses$numeric_value)))
responses$numeric_value = responses$id
responses <- as.data.frame(lapply(responses, unlist)[c("name", "numeric_value")])
names(responses) <- c("Response", "Value")

latexTop <- sprintf("
\\thispagestyle{fancy}
\\lhead{%s}
\\setlength{\\extrarowheight}{20pt}
\\begin{tabular*}{7in}{p{2.5in}p{4.5in}}
Question & %s \\\\
Name & %s \\\\
Alias & %s \\\\
ID & %s \\\\
Filtering Notes & %s \\\\
\\end{tabular*}
",
txt$meta$name,
txt$meta$question,
txt$meta$name,
txt$meta$alias,
txt$meta$id,
ifelse(txt$meta$filter_text == "", "None", txt$meta$filter_text)
)
latexTop <- gsub(" ", "", latexTop)

latexResponse

latexTop
}

#' @describeIn codebookItem Prepares a codebookitem for a CategoricalArrayVariable
#' @export
codebookItem.CategoricalArrayVariable <- function(x) {
txt = codebookItemTxt(x)
subvars = codebookItemSubVars(x)


}

#' @describeIn codebookItem Prepares a codebookitem for a MultipleResponseVariable
#' @export
codebookItem.MultipleResponseVariable <- function(x) {

}

#' @describeIn codebookItem Prepares a codebookitem for a NumericVariable
#' @export
codebookItem.NumericVariable <- function(x) {
tmp <- c(summary(c_var), SD = sd(c_var, na.rm = TRUE))
tmp <- tibble::rownames_to_column(as.data.frame(tmp))
colnames(tmp) <- c("Summary", "Value")
tmp$Value <- round(tmp$Value, 2)
}

#' @describeIn codebookItem Prepares a codebookitem for a TextVariable
#' @export
codebookItem.TextVariable <- function(x) {

}

#' @describeIn codebookItem Prepares a codebookitem for a DatetimeVariable
#' @export
codebookItem.DatetimeVariable <- function(x) {

}

#' Generate LaTeX CodeBooks
#'
#' \code{writeCodebook} produces publication-quality LaTeX reports
#'
#' @param ds A crunch dataset
#' @param ds
#'
#' @param ... Additional arguments passed to writeLatx
#'
#' @importFrom utils installed.packages
#' @export
writeCodebook <- function(...) {

writeLatex(...)

}
104 changes: 97 additions & 7 deletions R/crosstabs.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#' @param banner An optional object of class \code{Banner} that should be used to generate
#' a Crosstabs summary. Defaults to \code{NULL} - a Toplines summary is produced and returned.
#' @param codebook If \code{TRUE}, codebook data summaries are prepared. Defaults to \code{FALSE}.
#' @param include_numeric Logical. Should we include numeric questions? Defaults to FALSE. Implemented for Toplines only.
#' @param include_datetime Logical. Should we include date time questions? Defaults to FALSE. Implemented for Toplines only.
#' @param include_verbatims Logical. Should we include a sample text varaibles? Defaults to FALSE. Implemented for Toplines only.
#' @param num_verbatims An integer identifying the number of examples to extract from a text variable. Defaults to 10. Implemented for Toplines only.
#' @return A Toplines (when no banner is provided) or Crosstabs (when a banner is provided)
#' summary of the input dataset.
#' @examples
Expand All @@ -22,7 +26,7 @@
#' @importFrom crunch name aliases allVariables is.Numeric is.dataset weight alias weightVariables is.variable
#' @importFrom methods is
#' @export
crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE) {
crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE, include_numeric = FALSE, include_datetime = FALSE, include_verbatims = FALSE, num_verbatims = 10) {

# TODO: open ends
wrong_class_error(dataset, "CrunchDataset", "dataset")
Expand Down Expand Up @@ -61,10 +65,11 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
vars_out <- if (codebook) { vars } else {
intersect(vars, all_aliases[all_types %in% c("categorical", "multiple_response", "categorical_array", "numeric")]) }

error_if_items(
unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
"`vars` of type(s) {items} are not supported and have been skipped.",
and = TRUE, error = FALSE)

# error_if_items(
# unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
# "`vars` of type(s) {items} are not supported and have been skipped.",
# and = TRUE, error = FALSE)

if (length(vars_out) == 0) {
stop("No variables provided.")
Expand All @@ -79,7 +84,13 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
banner
}

results <- tabBooks(dataset = dataset, vars = vars_out, banner = banner_use, weight = weight_var, topline = is.null(banner))
results <- tabBooks(
dataset = dataset,
vars = vars_out,
banner = banner_use,
weight = weight_var,
topline = is.null(banner)
)

if (codebook) {
res_class <- "Codebook"
Expand All @@ -106,12 +117,91 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
class(banner) <- 'Banner'
}

# Here we create logic for including summaries
# for variable types that did not previously have
# summaries (Numeric, Datetime, Text)

var_types <- unlist(lapply(dataset[vars], class))
names(var_types) <- vars
numerics <- vars[var_types == "NumericVariable"]
datetimes <- vars[var_types == "DatetimeVariable"]
verbatims <- vars[var_types == "TextVariable"]

if (include_numeric & length(numerics) > 0) {
# drop weighting vars
weightVars <- unlist(
lapply(numerics, function(x) is.weightVariable(dataset[[x]]))
)

numerics <- numerics[!weightVars]

numRes <- lapply(numerics, function(x) {
prepareExtraSummary(
dataset[[x]],
weighted = !is.null(weight)
)
})
names(numRes) <- numerics

results = c(
results, numRes
)
}

if (include_datetime & length(datetimes) > 0) {

datetimeRes <- lapply(datetimes, function(x) {
prepareExtraSummary(
dataset[[x]],
weighted = !is.null(weight)
)
})

names(datetimeRes) <- datetimes
results = c(
results,
datetimeRes
)
}

if (include_verbatims & length(verbatims) > 0) {
verbatimRes <- lapply(verbatims, function(x) {
prepareExtraSummary(
dataset[[x]],
weighted = !is.null(weight)
)
})

names(verbatimRes) <- verbatims

results = c(
results,
verbatimRes
)
}

if (include_verbatims | include_datetime | include_numeric) {
# If we include new question types we must reflow question
# numbers because otherwise they will be missing from the
# faked objects

# First re-flow in dataset order
tmpResults <- list()
for (i in vars) { tmpResults[[i]] <- results[[i]] }
# Then re-flow question numbers
results = reflowQuestionNumbers(tmpResults)
}

summary_data <- list(
metadata = c(
list(
title = name(dataset), weight = weight,
start_date = crunch::startDate(dataset), end_date = crunch::endDate(dataset),
description = crunch::description(dataset))), results = results, banner = banner
description = crunch::description(dataset)
)
),
results = results,
banner = banner
)

class(summary_data) <- res_class
Expand Down
Loading