From f415b4938fa1a13fd68dd6cebab0f6ea79a0700d Mon Sep 17 00:00:00 2001 From: "Steven M. Mortimer" Date: Sat, 3 Jul 2021 16:30:01 -0500 Subject: [PATCH] Allow guess_types in sf_run_report and fix date and datetime column bug Salesforce returns dates and datetimes in UTC but sometimes as YYYY-MM-DD or MM/DD/YYYY in the case of reports, so we will convert using the anytime package rather than trusting type_convert()'s behavior. Allow users to pass guess_types directly to sf_run_report() in case they still want to do that and parse the character strings on their own Closes #93 --- DESCRIPTION | 1 + NAMESPACE | 3 +++ R/analytics-report.R | 13 ++++++++++++- R/utils-query.R | 28 ++++++++++++++++++++------- man/sf_guess_cols.Rd | 2 +- man/sf_run_report.Rd | 46 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 84 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 202971f5..cefa6138 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Imports: tibble (>= 3.0.3), readr (>= 1.3.1), lubridate (>= 1.7.8), + anytime (>= 0.3.9), rlang (>= 0.4.7), httr (>= 1.4.1), curl (>= 4.3), diff --git a/NAMESPACE b/NAMESPACE index e48f0f02..af1e28a1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -282,6 +282,8 @@ importFrom(XML,xmlSApply) importFrom(XML,xmlSize) importFrom(XML,xmlToList) importFrom(XML,xmlValue) +importFrom(anytime,anydate) +importFrom(anytime,anytime) importFrom(base64enc,base64encode) importFrom(curl,form_data) importFrom(curl,form_file) @@ -359,6 +361,7 @@ importFrom(purrr,transpose) importFrom(readr,col_character) importFrom(readr,col_guess) importFrom(readr,cols) +importFrom(readr,locale) importFrom(readr,parse_datetime) importFrom(readr,read_csv) importFrom(readr,type_convert) diff --git a/R/analytics-report.R b/R/analytics-report.R index 408c8d8e..ddcdde73 100644 --- a/R/analytics-report.R +++ b/R/analytics-report.R @@ -962,6 +962,9 @@ sf_get_report_instance_results <- function(report_id, #' report finish running so that data can be obtained. Otherwise, return the #' report instance details which can be used to retrieve the results when the #' async report has finished. +#' @template guess_types +#' @template bind_using_character_cols +#' @template fact_map_key #' @template verbose #' @return \code{tbl_df} #' @family Report functions @@ -1017,6 +1020,9 @@ sf_run_report <- function(report_id, interval_seconds = 3, max_attempts = 200, wait_for_results = TRUE, + guess_types = TRUE, + bind_using_character_cols = FALSE, + fact_map_key = "T!T", verbose = FALSE){ # build out the body of the request based on the inputted arguments by starting @@ -1090,7 +1096,9 @@ sf_run_report <- function(report_id, results <- sf_execute_report(report_id, async = async, - report_metadata = request_body, + report_metadata = request_body, + guess_types = guess_types, + bind_using_character_cols = bind_using_character_cols, verbose = verbose) # request the report results (still wait if async is specified) @@ -1123,6 +1131,9 @@ sf_run_report <- function(report_id, } results <- sf_get_report_instance_results(report_id, results$id, + guess_types = guess_types, + bind_using_character_cols = bind_using_character_cols, + fact_map_key = "T!T", verbose = verbose) } } diff --git a/R/utils-query.R b/R/utils-query.R index 4acdb5ab..28e46ee0 100644 --- a/R/utils-query.R +++ b/R/utils-query.R @@ -733,12 +733,13 @@ sf_reorder_cols <- function(df){ contains(".")) } -#' Reorder resultset columns to prioritize \code{sObject} and \code{Id} +#' Parse resultset columns to a known datatype in R #' #' This function accepts a \code{tbl_df} with columns rearranged. #' #' @importFrom dplyr mutate across -#' @importFrom readr type_convert cols col_guess +#' @importFrom anytime anytime anydate +#' @importFrom readr type_convert cols col_guess locale #' @param df \code{tbl_df}; the data frame to rearrange columns in #' @return \code{tbl_df} the formatted data frame #' @note This function is meant to be used internally. Only use when debugging. @@ -746,19 +747,32 @@ sf_reorder_cols <- function(df){ #' @export sf_guess_cols <- function(df, guess_types=TRUE, dataType=NULL){ if(guess_types){ - if(is.null(dataType) || any(is.na(dataType)) || (length(dataType)== 0)){ + if(is.null(dataType) || any(is.na(dataType)) || (length(dataType) == 0)){ df <- df %>% - type_convert(col_types = cols(.default = col_guess())) + type_convert(col_types = cols(.default = col_guess()), locale=locale(tz="UTC")) } else { col_spec <- sf_build_cols_spec(dataType) - # if numeric but contains Salesforce "-" then preemptively change to NA + # if numeric Salesforce will flag N/A as "-" so we need to preemptively change to NA + # TODO: Does it use "-" for NA or zero? Or both? if(grepl('i|n', col_spec)){ numeric_col_idx <- which(strsplit(col_spec, split=character(0))[[1]] %in% c("i", "n")) df <- df %>% mutate(across(all_of(numeric_col_idx), ~ifelse(.x == "-", NA_character_, .x))) } - df <- df %>% - type_convert(col_types = col_spec) + # Salesforce returns dates and datetimes in UTC but sometimes as YYYY-MM-DD + # or MM/DD/YYYY in the case of reports, so we will convert using the + # anytime package rather than trusting type_convert's behavior + if(grepl('D', col_spec)){ + date_col_idx <- which(strsplit(col_spec, split=character(0))[[1]] == "D") + df <- df %>% + mutate(across(all_of(date_col_idx), ~as.character(anydate(.x, tz="UTC", asUTC=TRUE)))) + } + if(grepl('T', col_spec)){ + datetime_col_idx <- which(strsplit(col_spec, split=character(0))[[1]] == "T") + df <- df %>% + mutate(across(all_of(datetime_col_idx), ~as.character(anytime(.x, tz="UTC", asUTC=TRUE)))) + } + df <- df %>% type_convert(col_types = col_spec, locale=locale(tz="UTC")) } } return(df) diff --git a/man/sf_guess_cols.Rd b/man/sf_guess_cols.Rd index 61589fcd..b60d1ab6 100644 --- a/man/sf_guess_cols.Rd +++ b/man/sf_guess_cols.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils-query.R \name{sf_guess_cols} \alias{sf_guess_cols} -\title{Reorder resultset columns to prioritize \code{sObject} and \code{Id}} +\title{Parse resultset columns to a known datatype in R} \usage{ sf_guess_cols(df, guess_types = TRUE, dataType = NULL) } diff --git a/man/sf_run_report.Rd b/man/sf_run_report.Rd index f2aa2e1d..edfbfc14 100644 --- a/man/sf_run_report.Rd +++ b/man/sf_run_report.Rd @@ -15,6 +15,9 @@ sf_run_report( interval_seconds = 3, max_attempts = 200, wait_for_results = TRUE, + guess_types = TRUE, + bind_using_character_cols = FALSE, + fact_map_key = "T!T", verbose = FALSE ) } @@ -63,6 +66,26 @@ report finish running so that data can be obtained. Otherwise, return the report instance details which can be used to retrieve the results when the async report has finished.} +\item{guess_types}{\code{logical}; indicating whether or not to use \code{col_guess()} +to try and cast the data returned in the recordset. If \code{TRUE} then +\code{col_guess()} is used, if \code{FALSE} then all fields will be returned +as character. This is helpful when \code{col_guess()} will mangle field values +in Salesforce that you'd like to preserve during translation into a \code{tbl_df}, +like numeric looking values that must be preserved as strings ("48.0").} + +\item{bind_using_character_cols}{\code{logical}; an indicator of whether to +cast the data to all character columns to ensure that \code{\link[dplyr:bind]{bind_rows}} +does not fail because two paginated recordsets have differing datatypes for the +same column. Set this to \code{TRUE} rarely, typically only when having this +set to \code{FALSE} returns an error or you want all columns in the data to be +character.} + +\item{fact_map_key}{\code{character}; string providing an index into each +section of a fact map, from which you can access summary and detailed data. +The pattern for the fact map keys varies by report format so it is important +to know what the \code{reportFormat} property of the target report is. See the +note below for more details.} + \item{verbose}{\code{logical}; an indicator of whether to print additional detail for each API call, which is useful for debugging. More specifically, when set to \code{TRUE} the URL, header, and body will be printed for each request, @@ -88,6 +111,29 @@ without total rows and given options to filter, and select the Top N as function arguments rather than forcing the user to create an entire list of \code{reportMetadata}. } +\note{ +Below are the fact map key patterns for three report types: +\describe{ +\item{TABULAR}{\code{T!T}: The grand total of a report. Both record data +values and the grand total are represented by this key.} +\item{SUMMARY}{\code{!T}: T refers to the row grand total.} +\item{MATRIX}{\code{!.}} +} + +Each item in a row or column grouping is numbered starting with 0. Here are +some examples of fact map keys: + +\describe{ +\item{0!T}{The first item in the first-level grouping.} +\item{1!T}{The second item in the first-level grouping.} +\item{0_0!T}{The first item in the first-level grouping and the first item +in the second-level grouping.} +\item{0_1!T}{The first item in the first-level grouping and the second item +in the second-level grouping.} +} +} \section{Salesforce Documentation}{ \itemize{