Skip to content

Commit

Permalink
Add Duplicate Matching and Start of More Global Header Options Control
Browse files Browse the repository at this point in the history
  • Loading branch information
StevenMMortimer committed May 15, 2019
1 parent 9c10b48 commit dd894d8
Show file tree
Hide file tree
Showing 29 changed files with 1,840 additions and 616 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(VERB_n)
export(build_soap_xml_from_list)
export(catch_errors)
export(collapse_list_with_dupe_names)
export(get_os)
export(make_base_metadata_url)
export(make_base_rest_url)
Expand Down Expand Up @@ -68,6 +69,8 @@ export(sf_describe_metadata)
export(sf_describe_object_fields)
export(sf_describe_objects)
export(sf_end_job_bulk)
export(sf_find_duplicates)
export(sf_find_duplicates_by_id)
export(sf_get_all_jobs_bulk)
export(sf_get_job_bulk)
export(sf_get_job_records_bulk)
Expand Down Expand Up @@ -157,6 +160,7 @@ importFrom(methods,as)
importFrom(purrr,map)
importFrom(purrr,map_df)
importFrom(purrr,map_dfc)
importFrom(purrr,modify_if)
importFrom(readr,col_character)
importFrom(readr,col_guess)
importFrom(readr,cols)
Expand Down
11 changes: 8 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@

* Add **RForcecom** backward compatibile version of `rforcecom.getObjectDescription()`
* Add `sf_describe_object_fields()` which is a tidyier version of `rforcecom.getObjectDescription()`
* Allow users to control whether bulk query results are kept as all character or
the types are guessed (#12)
* Add `sf_get_all_jobs_bulk()` so that users can see retrieve details for all bulk jobs (#13)
* Allow users to control whether query results are kept as all character or the
types are guessed (#12)
* Add `sf_get_all_jobs_bulk()` so that users can see retrieve details for all
bulk jobs (#13)
* Add new utility functions `sf_set_password()` and `sf_reset_password()` (#11)
* Add two new functions to check for duplicates (`sf_find_duplicates()`, `sf_find_duplicates_by_id()`) (#4)

### Bug Fixes

* Fix bug where Username/Password authenticated sessions where not working with
api_type = "Bulk 1.0"
* Fix bug where Bulk 1.0 queries that timeout hit an error while trying to abort
since that only supported aborting Bulk 2.0 jobs (#13)
* Fix bug that had only production environment logins possible because of hard
coding (@weckstm, #18)
* Make `sf_describe_object_fields()` more robust against nested list elements (#16)

---

Expand Down
3 changes: 2 additions & 1 deletion R/read-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ sf_read_metadata <- function(metadata_type, object_names, verbose=FALSE){
#' of the fields on that object by returning a tibble with one row per field.
#'
#' @importFrom readr type_convert cols
#' @importFrom dplyr as_tibble
#' @importFrom dplyr as_tibble
#' @importFrom purrr modify_if
#' @template object_name
#' @note The tibble only contains the fields that the user can view, as defined by
#' the user's field-level security settings.
Expand Down
176 changes: 176 additions & 0 deletions R/utils-org.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,182 @@ sf_list_objects <- function(){
return(response_parsed)
}

#' Find Duplicate Records
#'
#' Performs rule-based searches for duplicate records.
#'
#' @importFrom dplyr select rename_at everything matches as_tibble
#' @importFrom readr cols type_convert
#' @importFrom purrr map_df
#' @importFrom xml2 xml_ns_strip xml_find_all
#' @importFrom httr content
#' @param search_criteria \code{list}; a list of fields and their values that would
#' constitute a match. For example, list(FirstName="Marc", Company="Salesforce")
#' @template object_name
#' @template include_record_details
#' @template verbose
#' @return \code{tbl_df} of records found to be duplicates by the match rules
#' @note You must have actived duplicate rules for the supplied object before running
#' this function. The \code{object_name} argument refers to using that object's duplicate
#' rules on the search criteria to determine which records in other objects are duplicates.
#' @examples
#' \dontrun{
#' # if insert a lead with this email address, what duplicate records exist elsewhere
#' # according to the Lead object's duplicate rules
#' found_dupes <- sf_find_duplicates(search_criteria = list(Email = "bond_john@@grandhotels.com"),
#' object_name = "Lead")
#'
#' # now look across all other objects using the Contact object rules
#' found_dupes <- sf_find_duplicates(search_criteria = list(Email = "bond_john@@grandhotels.com"),
#' object_name = "Contact")
#' }
#' @export
sf_find_duplicates <- function(search_criteria,
object_name,
include_record_details = FALSE,
verbose = FALSE){

base_soap_url <- make_base_soap_url()
if(verbose) {
message(base_soap_url)
}

# build the body
r <- make_soap_xml_skeleton(soap_headers=list(DuplicateRuleHeader = list(includeRecordDetails = tolower(include_record_details))))
xml_dat <- build_soap_xml_from_list(input_data = search_criteria,
operation = "findDuplicates",
object_name = object_name,
root = r)

httr_response <- rPOST(url = base_soap_url,
headers = c("SOAPAction"="create",
"Content-Type"="text/xml"),
body = as(xml_dat, "character"))
catch_errors(httr_response)
response_parsed <- content(httr_response, encoding='UTF-8')

duplicate_entitytype <- response_parsed %>%
xml_ns_strip() %>%
xml_find_all('.//result') %>%
xml_find_all('.//duplicateResults//duplicateRuleEntityType') %>%
xml_text() %>%
head(1)

which_rules <- response_parsed %>%
xml_ns_strip() %>%
xml_find_all('.//result') %>%
xml_find_all('.//duplicateResults//duplicateRule') %>%
map(xml_text) %>%
unlist() %>%
paste(collapse = ", ")

message(sprintf("Using %s rules: %s", duplicate_entitytype, which_rules))

this_res <- response_parsed %>%
xml_ns_strip() %>%
xml_find_all('.//result') %>%
xml_find_all('.//duplicateResults//matchResults//matchRecords//record') %>%
map_df(xml_nodeset_to_df) %>%
rename_at(.vars = vars(contains("sf:")),
.funs = list(~gsub("sf:", "", .))) %>%
rename_at(.vars = vars(contains("Id1")),
.funs = list(~gsub("Id1", "Id", .))) %>%
select(-matches("^V[0-9]+$")) %>%
# move columns without dot up since those are related entities
select(-matches("\\."), everything()) %>%
type_convert(col_types = cols()) %>%
as_tibble()

# drop columns which are completely missing. This happens with this function whenever
# a linked object is null for a record, so a column is created "sf:EntityName" that
# is NA for that record and then NA for the other records since it is a non-null entity for them
this_res <- Filter(function(x) !all(is.na(x)), this_res)

return(this_res)
}

#' Find Duplicate Records By Id
#'
#' Performs rule-based searches for duplicate records.
#'
#' @template sf_id
#' @template include_record_details
#' @template verbose
#' @return \code{tbl_df} of records found to be duplicates by the match rules
#' @note You must have actived duplicate rules for the supplied object before running
#' this function. This function uses the duplicate rules for the object that has
#' the same type as the input record IDs. For example, if the record Id represents
#' an Account, this function uses the duplicate rules associated with the
#' Account object.
#' @examples
#' \dontrun{
#' found_dupes <- sf_find_duplicates_by_id(sf_id = "00Q6A00000aABCnZZZ")
#' }
#' @export
sf_find_duplicates_by_id <- function(sf_id,
include_record_details = FALSE,
verbose = FALSE){

stopifnot(length(sf_id) == 1)

base_soap_url <- make_base_soap_url()
if(verbose) {
message(base_soap_url)
}

# build the body
r <- make_soap_xml_skeleton(soap_headers=list(DuplicateRuleHeader = list(includeRecordDetails = tolower(include_record_details))))
xml_dat <- build_soap_xml_from_list(input_data = sf_id,
operation = "findDuplicatesByIds",
root = r)

httr_response <- rPOST(url = base_soap_url,
headers = c("SOAPAction"="create",
"Content-Type"="text/xml"),
body = as(xml_dat, "character"))
catch_errors(httr_response)
response_parsed <- content(httr_response, encoding='UTF-8')

duplicate_entitytype <- response_parsed %>%
xml_ns_strip() %>%
xml_find_all('.//result') %>%
xml_find_all('.//duplicateResults//duplicateRuleEntityType') %>%
xml_text() %>%
head(1)

which_rules <- response_parsed %>%
xml_ns_strip() %>%
xml_find_all('.//result') %>%
xml_find_all('.//duplicateResults//duplicateRule') %>%
map(xml_text) %>%
unlist() %>%
paste(collapse = ", ")

message(sprintf("Using %s rules: %s", duplicate_entitytype, which_rules))

this_res <- response_parsed %>%
xml_ns_strip() %>%
xml_find_all('.//result') %>%
xml_find_all('.//duplicateResults//matchResults//matchRecords//record') %>%
map_df(xml_nodeset_to_df) %>%
rename_at(.vars = vars(contains("sf:")),
.funs = list(~gsub("sf:", "", .))) %>%
rename_at(.vars = vars(contains("Id1")),
.funs = list(~gsub("Id1", "Id", .))) %>%
select(-matches("^V[0-9]+$")) %>%
# move columns without dot up since those are related entities
select(-matches("\\."), everything()) %>%
type_convert(col_types = cols()) %>%
as_tibble()

# drop columns which are completely missing. This happens with this function whenever
# a linked object is null for a record, so a column is created "sf:EntityName" that
# is NA for that record and then NA for the other records since it is a non-null entity for them
this_res <- Filter(function(x) !all(is.na(x)), this_res)

return(this_res)
}

#' #' Delete from Recycle Bin
#' #'
#' #' Delete records from the recycle bin immediately.
Expand Down
11 changes: 8 additions & 3 deletions R/utils-xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,12 @@ make_soap_xml_skeleton <- function(soap_headers=list(), metadata_ns=FALSE){
if(length(soap_headers)>0){
for(i in 1:length(soap_headers)){
opt_node <- newXMLNode(paste0(ns_prefix, ":", names(soap_headers)[i]),
as.character(soap_headers[[i]]),
parent=header_node)
for(j in 1:length(soap_headers[[i]])){
this_node <- newXMLNode(paste0(ns_prefix, ":", names(soap_headers[[i]])[j]),
as.character(soap_headers[[i]][[j]]),
parent=opt_node)
}
}
}
return(root)
Expand Down Expand Up @@ -164,7 +168,8 @@ build_soap_xml_from_list <- function(input_data,
"delete", "search",
"query", "queryMore",
"describeSObjects",
"setPassword", "resetPassword"),
"setPassword", "resetPassword",
"findDuplicates", "findDuplicatesByIds"),
object_name=NULL,
fields=NULL,
external_id_fieldname=NULL,
Expand Down Expand Up @@ -216,7 +221,7 @@ build_soap_xml_from_list <- function(input_data,
input_data[1,1],
parent=operation_node)

} else if(which_operation %in% c("delete","retrieve")){
} else if(which_operation %in% c("delete","retrieve","findDuplicatesByIds")){

for(i in 1:nrow(input_data)){
this_node <- newXMLNode("urn:ids",
Expand Down
91 changes: 87 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ sf_input_data_validation <- function(input_data, operation=''){
# TODO: Automatic date validation
# https://developer.salesforce.com/docs/atlas.en-us.api_bulk_v2.meta/api_bulk_v2/datafiles_date_format.htm

# put everything into a data.frame format if it's not already
if(!is.data.frame(input_data)){
if(is.null(names(input_data))){
if(!is.list(input_data)){
Expand All @@ -77,11 +76,11 @@ sf_input_data_validation <- function(input_data, operation=''){
names(input_data) <- "sObjectType"
}

if(operation %in% c("delete", "retrieve") & ncol(input_data) == 1){
if(operation %in% c("delete", "retrieve", "findDuplicatesByIds") & ncol(input_data) == 1){
names(input_data) <- "Id"
}

if(operation %in% c("delete", "update")){
if(operation %in% c("delete", "update", "findDuplicatesByIds")){
if(any(grepl("^ID$|^IDS$", names(input_data), ignore.case=TRUE))){
idx <- grep("^ID$|^IDS$", names(input_data), ignore.case=TRUE)
names(input_data)[idx] <- "Id"
Expand All @@ -90,4 +89,88 @@ sf_input_data_validation <- function(input_data, operation=''){
}

return(input_data)
}
}

api_headers <- function(api_type=NULL,
AllorNoneHeader=list(allOrNone=FALSE),
AllowFieldTruncationHeader=list(allowFieldTruncation=FALSE),
AssignmentRuleHeader=list(useDefaultRule=TRUE),
CallOptions=list(client=NA, defaultNamespace=NA),
DisableFeedTrackingHeader=list(disableFeedTracking=FALSE),
DuplicateRuleHeader=list(allowSave=FALSE,
includeRecordDetails=FALSE,
runAsCurrentUser=TRUE),
EmailHeader=list(triggerAutoResponseEmail=FALSE,
triggerOtherEmail=FALSE,
triggerUserEmail=TRUE),
LimitInfoHeader=list(current="20",
limit="250",
type="API REQUESTS"),
LocaleOptions=list(language=NA),
LoginScopeHeader=list(organizationId=NA,
portalId=NA),
MruHeader=list(updateMru=FALSE),
OwnerChangeOptions=list(options=list(list(execute=FALSE,
type="EnforceNewOwnerHasReadAccess"),
list(execute=TRUE,
type="KeepSalesTeam"),
list(execute=FALSE,
type="KeepSalesTeamGrantCurrentOwnerReadWriteAccess"),
list(execute=TRUE,
type="TransferOpenActivities"),
list(execute=FALSE,
type="TransferNotesAndAttachments"),
list(execute=TRUE,
type="TransferOtherOpenOpportunities"),
list(execute=TRUE,
type="TransferOwnedOpenOpportunities"),
list(execute=TRUE,
type="TransferContracts"),
list(execute=TRUE,
type="TransferOrders"),
list(execute=TRUE,
type="TransferContacts"))),
PackageVersionHeader=list(packageVersions=NA),
QueryOptions=list(batchSize=500),
SessionHeader=list(sessionId=NA),
UserTerritoryDeleteHeader=list(transferToUserId=NA),
ContentTypeHeader=list(`Content-Type`="application/xml"),
BatchRetryHeader=list(`Sforce-Disable-Batch-Retry`=FALSE),
LineEndingHeader=list(`Sforce-Line-Ending`=NA),
PKChunkingHeader=list(`Sforce-Enable-PKChunking`=FALSE)){

# check if its in the supplied and known list
# tailor the search to the API

api_type <- match.arg(api_type)

if(!is.null()){
if(api_type == "SOAP"){

} else if(api_type == "REST"){

} else if(api_type == "Bulk 1.0"){

} else {
# do nothing
}
}

sf_user_info()$userLocale

list()
}



# TESTING
# # if x is used, then it must be supplied or given a default
# # Error in zz() : argument "x" is missing, with no default
# zz <- function(x,y){
# if(missing(x)){
# x <- 2
# }
# xx <- x
# return(5)
# }

Loading

0 comments on commit dd894d8

Please sign in to comment.