From f415b4938fa1a13fd68dd6cebab0f6ea79a0700d Mon Sep 17 00:00:00 2001
From: "Steven M. Mortimer" <mortimer.steven.m@gmail.com>
Date: Sat, 3 Jul 2021 16:30:01 -0500
Subject: [PATCH] Allow guess_types in sf_run_report and fix date and datetime
 column bug

Salesforce returns dates and datetimes in UTC but
sometimes as YYYY-MM-DD or MM/DD/YYYY in the case of
reports, so we will convert using the anytime package
rather than trusting type_convert()'s behavior.

Allow users to pass guess_types directly to sf_run_report()
in case they still want to do that and parse the character
strings on their own

Closes #93
---
 DESCRIPTION          |  1 +
 NAMESPACE            |  3 +++
 R/analytics-report.R | 13 ++++++++++++-
 R/utils-query.R      | 28 ++++++++++++++++++++-------
 man/sf_guess_cols.Rd |  2 +-
 man/sf_run_report.Rd | 46 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 202971f5..cefa6138 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,6 +32,7 @@ Imports:
     tibble (>= 3.0.3),
     readr (>= 1.3.1),
     lubridate (>= 1.7.8),
+    anytime (>= 0.3.9),
     rlang (>= 0.4.7),
     httr (>= 1.4.1),
     curl (>= 4.3),
diff --git a/NAMESPACE b/NAMESPACE
index e48f0f02..af1e28a1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -282,6 +282,8 @@ importFrom(XML,xmlSApply)
 importFrom(XML,xmlSize)
 importFrom(XML,xmlToList)
 importFrom(XML,xmlValue)
+importFrom(anytime,anydate)
+importFrom(anytime,anytime)
 importFrom(base64enc,base64encode)
 importFrom(curl,form_data)
 importFrom(curl,form_file)
@@ -359,6 +361,7 @@ importFrom(purrr,transpose)
 importFrom(readr,col_character)
 importFrom(readr,col_guess)
 importFrom(readr,cols)
+importFrom(readr,locale)
 importFrom(readr,parse_datetime)
 importFrom(readr,read_csv)
 importFrom(readr,type_convert)
diff --git a/R/analytics-report.R b/R/analytics-report.R
index 408c8d8e..ddcdde73 100644
--- a/R/analytics-report.R
+++ b/R/analytics-report.R
@@ -962,6 +962,9 @@ sf_get_report_instance_results <- function(report_id,
 #' report finish running so that data can be obtained. Otherwise, return the
 #' report instance details which can be used to retrieve the results when the
 #' async report has finished.
+#' @template guess_types
+#' @template bind_using_character_cols
+#' @template fact_map_key
 #' @template verbose
 #' @return \code{tbl_df}
 #' @family Report functions
@@ -1017,6 +1020,9 @@ sf_run_report <- function(report_id,
                           interval_seconds = 3,
                           max_attempts = 200,
                           wait_for_results = TRUE,
+                          guess_types = TRUE,
+                          bind_using_character_cols = FALSE,
+                          fact_map_key = "T!T",
                           verbose = FALSE){
   
   # build out the body of the request based on the inputted arguments by starting 
@@ -1090,7 +1096,9 @@ sf_run_report <- function(report_id,
   
   results <- sf_execute_report(report_id, 
                                async = async, 
-                               report_metadata = request_body, 
+                               report_metadata = request_body,
+                               guess_types = guess_types,
+                               bind_using_character_cols = bind_using_character_cols,
                                verbose = verbose)
   
   # request the report results (still wait if async is specified)
@@ -1123,6 +1131,9 @@ sf_run_report <- function(report_id,
       }
       results <- sf_get_report_instance_results(report_id, 
                                                 results$id, 
+                                                guess_types = guess_types,
+                                                bind_using_character_cols = bind_using_character_cols,
+                                                fact_map_key = "T!T",
                                                 verbose = verbose)
     }
   }
diff --git a/R/utils-query.R b/R/utils-query.R
index 4acdb5ab..28e46ee0 100644
--- a/R/utils-query.R
+++ b/R/utils-query.R
@@ -733,12 +733,13 @@ sf_reorder_cols <- function(df){
              contains("."))
 }
 
-#' Reorder resultset columns to prioritize \code{sObject} and \code{Id}
+#' Parse resultset columns to a known datatype in R
 #' 
 #' This function accepts a \code{tbl_df} with columns rearranged.
 #' 
 #' @importFrom dplyr mutate across
-#' @importFrom readr type_convert cols col_guess
+#' @importFrom anytime anytime anydate
+#' @importFrom readr type_convert cols col_guess locale
 #' @param df \code{tbl_df}; the data frame to rearrange columns in
 #' @return \code{tbl_df} the formatted data frame
 #' @note This function is meant to be used internally. Only use when debugging.
@@ -746,19 +747,32 @@ sf_reorder_cols <- function(df){
 #' @export
 sf_guess_cols <- function(df, guess_types=TRUE, dataType=NULL){
   if(guess_types){
-    if(is.null(dataType) || any(is.na(dataType)) || (length(dataType)== 0)){
+    if(is.null(dataType) || any(is.na(dataType)) || (length(dataType) == 0)){
       df <- df %>% 
-        type_convert(col_types = cols(.default = col_guess()))      
+        type_convert(col_types = cols(.default = col_guess()), locale=locale(tz="UTC"))      
     } else {
       col_spec <- sf_build_cols_spec(dataType)
-      # if numeric but contains Salesforce "-" then preemptively change to NA
+      # if numeric Salesforce will flag N/A as "-" so we need to preemptively change to NA
+      # TODO: Does it use "-" for NA or zero? Or both?
       if(grepl('i|n', col_spec)){
         numeric_col_idx <- which(strsplit(col_spec, split=character(0))[[1]] %in% c("i", "n"))
         df <- df %>% 
           mutate(across(all_of(numeric_col_idx), ~ifelse(.x == "-", NA_character_, .x)))
       }
-      df <- df %>% 
-        type_convert(col_types = col_spec)      
+      # Salesforce returns dates and datetimes in UTC but sometimes as YYYY-MM-DD 
+      # or MM/DD/YYYY in the case of reports, so we will convert using the 
+      # anytime package rather than trusting type_convert's behavior
+      if(grepl('D', col_spec)){
+        date_col_idx <- which(strsplit(col_spec, split=character(0))[[1]] == "D")
+        df <- df %>% 
+          mutate(across(all_of(date_col_idx), ~as.character(anydate(.x, tz="UTC", asUTC=TRUE))))
+      }
+      if(grepl('T', col_spec)){
+        datetime_col_idx <- which(strsplit(col_spec, split=character(0))[[1]] == "T")
+        df <- df %>% 
+          mutate(across(all_of(datetime_col_idx), ~as.character(anytime(.x, tz="UTC", asUTC=TRUE))))
+      }
+      df <- df %>% type_convert(col_types = col_spec, locale=locale(tz="UTC"))
     }
   }
   return(df)
diff --git a/man/sf_guess_cols.Rd b/man/sf_guess_cols.Rd
index 61589fcd..b60d1ab6 100644
--- a/man/sf_guess_cols.Rd
+++ b/man/sf_guess_cols.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/utils-query.R
 \name{sf_guess_cols}
 \alias{sf_guess_cols}
-\title{Reorder resultset columns to prioritize \code{sObject} and \code{Id}}
+\title{Parse resultset columns to a known datatype in R}
 \usage{
 sf_guess_cols(df, guess_types = TRUE, dataType = NULL)
 }
diff --git a/man/sf_run_report.Rd b/man/sf_run_report.Rd
index f2aa2e1d..edfbfc14 100644
--- a/man/sf_run_report.Rd
+++ b/man/sf_run_report.Rd
@@ -15,6 +15,9 @@ sf_run_report(
   interval_seconds = 3,
   max_attempts = 200,
   wait_for_results = TRUE,
+  guess_types = TRUE,
+  bind_using_character_cols = FALSE,
+  fact_map_key = "T!T",
   verbose = FALSE
 )
 }
@@ -63,6 +66,26 @@ report finish running so that data can be obtained. Otherwise, return the
 report instance details which can be used to retrieve the results when the
 async report has finished.}
 
+\item{guess_types}{\code{logical}; indicating whether or not to use \code{col_guess()}
+to try and cast the data returned in the recordset. If \code{TRUE} then
+\code{col_guess()} is used, if \code{FALSE} then all fields will be returned
+as character. This is helpful when \code{col_guess()} will mangle field values
+in Salesforce that you'd like to preserve during translation into a \code{tbl_df},
+like numeric looking values that must be preserved as strings ("48.0").}
+
+\item{bind_using_character_cols}{\code{logical}; an indicator of whether to
+cast the data to all character columns to ensure that \code{\link[dplyr:bind]{bind_rows}}
+does not fail because two paginated recordsets have differing datatypes for the
+same column. Set this to \code{TRUE} rarely, typically only when having this
+set to \code{FALSE} returns an error or you want all columns in the data to be
+character.}
+
+\item{fact_map_key}{\code{character}; string providing an index into each
+section of a fact map, from which you can access summary and detailed data.
+The pattern for the fact map keys varies by report format so it is important
+to know what the \code{reportFormat} property of the target report is. See the
+note below for more details.}
+
 \item{verbose}{\code{logical}; an indicator of whether to print additional
 detail for each API call, which is useful for debugging. More specifically, when
 set to \code{TRUE} the URL, header, and body will be printed for each request,
@@ -88,6 +111,29 @@ without total rows and given options to filter, and select the Top N as
 function arguments rather than forcing the user to create an entire list of
 \code{reportMetadata}.
 }
+\note{
+Below are the fact map key patterns for three report types:
+\describe{
+\item{TABULAR}{\code{T!T}: The grand total of a report. Both record data
+values and the grand total are represented by this key.}
+\item{SUMMARY}{\code{<First level row grouping_second level row grouping_third 
+  level row grouping>!T}: T refers to the row grand total.}
+\item{MATRIX}{\code{<First level row grouping_second level row grouping>!<First 
+  level column grouping_second level column grouping>.}}
+}
+
+Each item in a row or column grouping is numbered starting with 0. Here are
+some examples of fact map keys:
+
+\describe{
+\item{0!T}{The first item in the first-level grouping.}
+\item{1!T}{The second item in the first-level grouping.}
+\item{0_0!T}{The first item in the first-level grouping and the first item
+in the second-level grouping.}
+\item{0_1!T}{The first item in the first-level grouping and the second item
+in the second-level grouping.}
+}
+}
 \section{Salesforce Documentation}{
 
 \itemize{