Add Duplicate Matching and Start of More Global Header Options Control

mitch-niche · May 15, 2019 · dd894d8 · dd894d8
1 parent 9c10b48
commit dd894d8
Show file tree

Hide file tree

Showing 29 changed files with 1,840 additions and 616 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 export(VERB_n)
 export(build_soap_xml_from_list)
 export(catch_errors)
+export(collapse_list_with_dupe_names)
 export(get_os)
 export(make_base_metadata_url)
 export(make_base_rest_url)
@@ -68,6 +69,8 @@ export(sf_describe_metadata)
 export(sf_describe_object_fields)
 export(sf_describe_objects)
 export(sf_end_job_bulk)
+export(sf_find_duplicates)
+export(sf_find_duplicates_by_id)
 export(sf_get_all_jobs_bulk)
 export(sf_get_job_bulk)
 export(sf_get_job_records_bulk)
@@ -157,6 +160,7 @@ importFrom(methods,as)
 importFrom(purrr,map)
 importFrom(purrr,map_df)
 importFrom(purrr,map_dfc)
+importFrom(purrr,modify_if)
 importFrom(readr,col_character)
 importFrom(readr,col_guess)
 importFrom(readr,cols)

diff --git a/NEWS.md b/NEWS.md
@@ -4,17 +4,22 @@
 
   * Add **RForcecom** backward compatibile version of `rforcecom.getObjectDescription()`
   * Add `sf_describe_object_fields()` which is a tidyier version of `rforcecom.getObjectDescription()`
-  * Allow users to control whether bulk query results are kept as all character or 
-  the types are guessed (#12)
-  * Add `sf_get_all_jobs_bulk()` so that users can see retrieve details for all bulk jobs (#13)
+  * Allow users to control whether query results are kept as all character or the 
+  types are guessed (#12)
+  * Add `sf_get_all_jobs_bulk()` so that users can see retrieve details for all 
+  bulk jobs (#13)
   * Add new utility functions `sf_set_password()` and `sf_reset_password()` (#11)
+  * Add two new functions to check for duplicates (`sf_find_duplicates()`, `sf_find_duplicates_by_id()`) (#4)
 
 ### Bug Fixes
 
   * Fix bug where Username/Password authenticated sessions where not working with 
   api_type = "Bulk 1.0"
   * Fix bug where Bulk 1.0 queries that timeout hit an error while trying to abort 
   since that only supported aborting Bulk 2.0 jobs (#13)
+  * Fix bug that had only production environment logins possible because of hard 
+  coding (@weckstm, #18)
+  * Make `sf_describe_object_fields()` more robust against nested list elements (#16)
 
 ---
 

diff --git a/R/read-metadata.R b/R/read-metadata.R
@@ -73,7 +73,8 @@ sf_read_metadata <- function(metadata_type, object_names, verbose=FALSE){
 #' of the fields on that object by returning a tibble with one row per field.
 #' 
 #' @importFrom readr type_convert cols
-#' @importFrom dplyr as_tibble
+#' @importFrom dplyr as_tibble 
+#' @importFrom purrr modify_if
 #' @template object_name
 #' @note The tibble only contains the fields that the user can view, as defined by 
 #' the user's field-level security settings.

diff --git a/R/utils-org.R b/R/utils-org.R
@@ -270,6 +270,182 @@ sf_list_objects <- function(){
   return(response_parsed)
 }
 
+#' Find Duplicate Records
+#' 
+#' Performs rule-based searches for duplicate records.
+#' 
+#' @importFrom dplyr select rename_at everything matches as_tibble
+#' @importFrom readr cols type_convert
+#' @importFrom purrr map_df 
+#' @importFrom xml2 xml_ns_strip xml_find_all
+#' @importFrom httr content
+#' @param search_criteria \code{list}; a list of fields and their values that would 
+#' constitute a match. For example, list(FirstName="Marc", Company="Salesforce")
+#' @template object_name
+#' @template include_record_details
+#' @template verbose
+#' @return \code{tbl_df} of records found to be duplicates by the match rules
+#' @note You must have actived duplicate rules for the supplied object before running 
+#' this function. The \code{object_name} argument refers to using that object's duplicate 
+#' rules on the search criteria to determine which records in other objects are duplicates.
+#' @examples
+#' \dontrun{
+#' # if insert a lead with this email address, what duplicate records exist elsewhere 
+#' # according to the Lead object's duplicate rules
+#' found_dupes <- sf_find_duplicates(search_criteria = list(Email = "bond_john@@grandhotels.com"),
+#'                                   object_name = "Lead")
+#'                                   
+#' # now look across all other objects using the Contact object rules
+#' found_dupes <- sf_find_duplicates(search_criteria = list(Email = "bond_john@@grandhotels.com"),
+#'                                   object_name = "Contact")
+#' }
+#' @export
+sf_find_duplicates <- function(search_criteria, 
+                               object_name, 
+                               include_record_details = FALSE,
+                               verbose = FALSE){
+
+  base_soap_url <- make_base_soap_url()
+  if(verbose) {
+    message(base_soap_url)
+  }
+
+  # build the body
+  r <- make_soap_xml_skeleton(soap_headers=list(DuplicateRuleHeader = list(includeRecordDetails = tolower(include_record_details))))
+  xml_dat <- build_soap_xml_from_list(input_data = search_criteria,
+                                      operation = "findDuplicates",
+                                      object_name = object_name,
+                                      root = r)
+
+  httr_response <- rPOST(url = base_soap_url,
+                         headers = c("SOAPAction"="create",
+                                     "Content-Type"="text/xml"),
+                         body = as(xml_dat, "character"))
+  catch_errors(httr_response)
+  response_parsed <- content(httr_response, encoding='UTF-8')
+
+  duplicate_entitytype <- response_parsed %>%
+    xml_ns_strip() %>%
+    xml_find_all('.//result') %>%
+    xml_find_all('.//duplicateResults//duplicateRuleEntityType') %>%
+    xml_text() %>% 
+    head(1)
+
+  which_rules <- response_parsed %>%
+    xml_ns_strip() %>%
+    xml_find_all('.//result') %>%
+    xml_find_all('.//duplicateResults//duplicateRule') %>%
+    map(xml_text) %>% 
+    unlist() %>% 
+    paste(collapse = ", ")
+
+  message(sprintf("Using %s rules: %s", duplicate_entitytype, which_rules))
+
+  this_res <- response_parsed %>%
+    xml_ns_strip() %>%
+    xml_find_all('.//result') %>%
+    xml_find_all('.//duplicateResults//matchResults//matchRecords//record') %>% 
+    map_df(xml_nodeset_to_df) %>%
+    rename_at(.vars = vars(contains("sf:")), 
+              .funs = list(~gsub("sf:", "", .))) %>%
+    rename_at(.vars = vars(contains("Id1")), 
+              .funs = list(~gsub("Id1", "Id", .))) %>%
+    select(-matches("^V[0-9]+$")) %>%
+    # move columns without dot up since those are related entities
+    select(-matches("\\."), everything()) %>%
+    type_convert(col_types = cols()) %>%
+    as_tibble()
+
+  # drop columns which are completely missing. This happens with this function whenever 
+  # a linked object is null for a record, so a column is created "sf:EntityName" that 
+  # is NA for that record and then NA for the other records since it is a non-null entity for them 
+  this_res <- Filter(function(x) !all(is.na(x)), this_res)
+
+  return(this_res)
+}
+
+#' Find Duplicate Records By Id
+#' 
+#' Performs rule-based searches for duplicate records.
+#' 
+#' @template sf_id
+#' @template include_record_details
+#' @template verbose
+#' @return \code{tbl_df} of records found to be duplicates by the match rules
+#' @note You must have actived duplicate rules for the supplied object before running 
+#' this function. This function uses the duplicate rules for the object that has 
+#' the same type as the input record IDs. For example, if the record Id represents 
+#' an Account, this function uses the duplicate rules associated with the 
+#' Account object.
+#' @examples 
+#' \dontrun{
+#' found_dupes <- sf_find_duplicates_by_id(sf_id = "00Q6A00000aABCnZZZ")
+#' }
+#' @export
+sf_find_duplicates_by_id <- function(sf_id,
+                                     include_record_details = FALSE, 
+                                     verbose = FALSE){
+
+  stopifnot(length(sf_id) == 1)
+
+  base_soap_url <- make_base_soap_url()
+  if(verbose) {
+    message(base_soap_url)
+  }
+
+  # build the body
+  r <- make_soap_xml_skeleton(soap_headers=list(DuplicateRuleHeader = list(includeRecordDetails = tolower(include_record_details))))
+  xml_dat <- build_soap_xml_from_list(input_data = sf_id,
+                                      operation = "findDuplicatesByIds",
+                                      root = r)
+
+  httr_response <- rPOST(url = base_soap_url,
+                         headers = c("SOAPAction"="create",
+                                     "Content-Type"="text/xml"),
+                         body = as(xml_dat, "character"))
+  catch_errors(httr_response)
+  response_parsed <- content(httr_response, encoding='UTF-8')
+
+  duplicate_entitytype <- response_parsed %>%
+    xml_ns_strip() %>%
+    xml_find_all('.//result') %>%
+    xml_find_all('.//duplicateResults//duplicateRuleEntityType') %>%
+    xml_text() %>% 
+    head(1)
+
+  which_rules <- response_parsed %>%
+    xml_ns_strip() %>%
+    xml_find_all('.//result') %>%
+    xml_find_all('.//duplicateResults//duplicateRule') %>%
+    map(xml_text) %>% 
+    unlist() %>% 
+    paste(collapse = ", ")
+
+  message(sprintf("Using %s rules: %s", duplicate_entitytype, which_rules))
+
+  this_res <- response_parsed %>%
+    xml_ns_strip() %>%
+    xml_find_all('.//result') %>%
+    xml_find_all('.//duplicateResults//matchResults//matchRecords//record') %>% 
+    map_df(xml_nodeset_to_df) %>%
+    rename_at(.vars = vars(contains("sf:")), 
+              .funs = list(~gsub("sf:", "", .))) %>%
+    rename_at(.vars = vars(contains("Id1")), 
+              .funs = list(~gsub("Id1", "Id", .))) %>%
+    select(-matches("^V[0-9]+$")) %>%
+    # move columns without dot up since those are related entities
+    select(-matches("\\."), everything()) %>%
+    type_convert(col_types = cols()) %>%
+    as_tibble()
+
+  # drop columns which are completely missing. This happens with this function whenever 
+  # a linked object is null for a record, so a column is created "sf:EntityName" that 
+  # is NA for that record and then NA for the other records since it is a non-null entity for them 
+  this_res <- Filter(function(x) !all(is.na(x)), this_res)
+
+  return(this_res)
+}
+
 #' #' Delete from Recycle Bin
 #' #' 
 #' #' Delete records from the recycle bin immediately.

diff --git a/R/utils-xml.R b/R/utils-xml.R
@@ -132,8 +132,12 @@ make_soap_xml_skeleton <- function(soap_headers=list(), metadata_ns=FALSE){
   if(length(soap_headers)>0){
     for(i in 1:length(soap_headers)){
       opt_node <- newXMLNode(paste0(ns_prefix, ":", names(soap_headers)[i]),
-                             as.character(soap_headers[[i]]),
                              parent=header_node)
+      for(j in 1:length(soap_headers[[i]])){
+        this_node <- newXMLNode(paste0(ns_prefix, ":", names(soap_headers[[i]])[j]),
+                                as.character(soap_headers[[i]][[j]]),
+                                parent=opt_node)
+      }
     }
   }
   return(root)
@@ -164,7 +168,8 @@ build_soap_xml_from_list <- function(input_data,
                                                    "delete", "search", 
                                                    "query", "queryMore", 
                                                    "describeSObjects", 
-                                                   "setPassword", "resetPassword"),
+                                                   "setPassword", "resetPassword", 
+                                                   "findDuplicates", "findDuplicatesByIds"),
                                      object_name=NULL,
                                      fields=NULL,
                                      external_id_fieldname=NULL,
@@ -216,7 +221,7 @@ build_soap_xml_from_list <- function(input_data,
                             input_data[1,1],
                             parent=operation_node)
 
-  } else if(which_operation %in% c("delete","retrieve")){
+  } else if(which_operation %in% c("delete","retrieve","findDuplicatesByIds")){
 
     for(i in 1:nrow(input_data)){
       this_node <- newXMLNode("urn:ids", 

diff --git a/R/utils.R b/R/utils.R
@@ -60,7 +60,6 @@ sf_input_data_validation <- function(input_data, operation=''){
   # TODO:  Automatic date validation
   # https://developer.salesforce.com/docs/atlas.en-us.api_bulk_v2.meta/api_bulk_v2/datafiles_date_format.htm
 
-  # put everything into a data.frame format if it's not already
   if(!is.data.frame(input_data)){
     if(is.null(names(input_data))){
       if(!is.list(input_data)){
@@ -77,11 +76,11 @@ sf_input_data_validation <- function(input_data, operation=''){
     names(input_data) <- "sObjectType"
   }
 
-  if(operation %in% c("delete", "retrieve") & ncol(input_data) == 1){
+  if(operation %in% c("delete", "retrieve", "findDuplicatesByIds") & ncol(input_data) == 1){
     names(input_data) <- "Id"
   }
 
-  if(operation %in% c("delete", "update")){
+  if(operation %in% c("delete", "update", "findDuplicatesByIds")){
     if(any(grepl("^ID$|^IDS$", names(input_data), ignore.case=TRUE))){
       idx <- grep("^ID$|^IDS$", names(input_data), ignore.case=TRUE)
       names(input_data)[idx] <- "Id"
@@ -90,4 +89,88 @@ sf_input_data_validation <- function(input_data, operation=''){
   }
 
   return(input_data)
-}
+}
+
+api_headers <- function(api_type=NULL, 
+                        AllorNoneHeader=list(allOrNone=FALSE), 
+                        AllowFieldTruncationHeader=list(allowFieldTruncation=FALSE), 
+                        AssignmentRuleHeader=list(useDefaultRule=TRUE),
+                        CallOptions=list(client=NA, defaultNamespace=NA), 
+                        DisableFeedTrackingHeader=list(disableFeedTracking=FALSE), 
+                        DuplicateRuleHeader=list(allowSave=FALSE, 
+                                                 includeRecordDetails=FALSE, 
+                                                 runAsCurrentUser=TRUE), 
+                        EmailHeader=list(triggerAutoResponseEmail=FALSE, 
+                                         triggerOtherEmail=FALSE, 
+                                         triggerUserEmail=TRUE), 
+                        LimitInfoHeader=list(current="20", 
+                                             limit="250", 
+                                             type="API REQUESTS"), 
+                        LocaleOptions=list(language=NA), 
+                        LoginScopeHeader=list(organizationId=NA, 
+                                              portalId=NA), 
+                        MruHeader=list(updateMru=FALSE), 
+                        OwnerChangeOptions=list(options=list(list(execute=FALSE, 
+                                                                  type="EnforceNewOwnerHasReadAccess"),
+                                                             list(execute=TRUE, 
+                                                                  type="KeepSalesTeam"),
+                                                             list(execute=FALSE, 
+                                                                  type="KeepSalesTeamGrantCurrentOwnerReadWriteAccess"),
+                                                             list(execute=TRUE, 
+                                                                  type="TransferOpenActivities"),
+                                                             list(execute=FALSE, 
+                                                                  type="TransferNotesAndAttachments"),
+                                                             list(execute=TRUE, 
+                                                                  type="TransferOtherOpenOpportunities"),
+                                                             list(execute=TRUE, 
+                                                                  type="TransferOwnedOpenOpportunities"),
+                                                             list(execute=TRUE, 
+                                                                  type="TransferContracts"),
+                                                             list(execute=TRUE, 
+                                                                  type="TransferOrders"),
+                                                             list(execute=TRUE, 
+                                                                  type="TransferContacts"))), 
+                        PackageVersionHeader=list(packageVersions=NA), 
+                        QueryOptions=list(batchSize=500), 
+                        SessionHeader=list(sessionId=NA), 
+                        UserTerritoryDeleteHeader=list(transferToUserId=NA), 
+                        ContentTypeHeader=list(`Content-Type`="application/xml"), 
+                        BatchRetryHeader=list(`Sforce-Disable-Batch-Retry`=FALSE), 
+                        LineEndingHeader=list(`Sforce-Line-Ending`=NA), 
+                        PKChunkingHeader=list(`Sforce-Enable-PKChunking`=FALSE)){
+
+  # check if its in the supplied and known list
+  # tailor the search to the API 
+
+  api_type <- match.arg(api_type)
+
+  if(!is.null()){
+    if(api_type == "SOAP"){
+
+    } else if(api_type == "REST"){
+
+    } else if(api_type == "Bulk 1.0"){
+
+    } else {
+      # do nothing
+    }
+  }
+
+  sf_user_info()$userLocale
+
+  list()
+}
+
+
+
+# TESTING
+# # if x is used, then it must be supplied or given a default
+# # Error in zz() : argument "x" is missing, with no default
+# zz <- function(x,y){
+#   if(missing(x)){
+#     x <- 2
+#   }
+#   xx <- x
+#   return(5)
+# }
+