From d75523988781e03e3507c2a4dcb2d9cf0a08bec1 Mon Sep 17 00:00:00 2001 From: eblondel Date: Fri, 23 Aug 2019 10:26:39 +0200 Subject: [PATCH] #21 line separator --- NAMESPACE | 3 +++ R/closeWorkflow.R | 1 + R/executeWorkflowJob.R | 2 +- R/geoflow_data.R | 2 +- R/geoflow_handler_contact.R | 2 +- R/geoflow_handler_entity.R | 16 ++++++++-------- R/geoflow_provenance.R | 2 +- R/geoflow_utils.R | 27 ++++++++++++++++++++++++++- R/initWorkflow.R | 9 +++++++++ R/profile.R | 3 +++ man/extract_cell_components.Rd | 11 +++++++++++ man/get_line_separator.Rd | 11 +++++++++++ man/set_line_separator.Rd | 11 +++++++++++ 13 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 man/extract_cell_components.Rd create mode 100644 man/get_line_separator.Rd create mode 100644 man/set_line_separator.Rd diff --git a/NAMESPACE b/NAMESPACE index 566626c1..e52d1bab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(closeWorkflow) export(executeWorkflow) export(executeWorkflowJob) +export(extract_cell_components) export(extract_kvp) export(filter_sf_by_cqlfilter) export(geoflow_action) @@ -19,6 +20,7 @@ export(geoflow_relation) export(geoflow_right) export(geoflow_software) export(geoflow_subject) +export(get_line_separator) export(handle_contacts_csv) export(handle_contacts_df) export(handle_contacts_excel) @@ -41,6 +43,7 @@ export(register_entity_handlers) export(register_software) export(sanitize_date) export(sanitize_str) +export(set_line_separator) export(str_to_posix) import(DBI) import(R6) diff --git a/R/closeWorkflow.R b/R/closeWorkflow.R index 99764445..288a4272 100644 --- a/R/closeWorkflow.R +++ b/R/closeWorkflow.R @@ -43,4 +43,5 @@ closeWorkflow <- function(config){ config$software$csw_config <- NULL } setwd(config$wd) + set_line_separator() #default line separator } \ No newline at end of file diff --git a/R/executeWorkflowJob.R b/R/executeWorkflowJob.R index b43ab1e8..d5156f72 100644 --- a/R/executeWorkflowJob.R +++ b/R/executeWorkflowJob.R @@ -115,7 +115,7 @@ executeWorkflowJob <- function(config, jobdir){ src_entities <- config$src_entities src_entities$Identifier <- sapply(1:nrow(src_entities), function(i){ identifier <- src_entities[i, "Identifier"] - if(!endsWith(identifier,";")) identifier <- paste0(identifier, ";\n") + if(!endsWith(identifier, .geoflow$LINE_SEPARATOR)) identifier <- paste0(identifier, .geoflow$LINE_SEPARATOR) if(regexpr("doi", identifier)>0) return(identifier) if(out_zenodo_dois[i,"Status"] == "published") return(identifier) identifier <- paste0(identifier, "doi:", out_zenodo_dois[i,"DOI_for_allversions"]) diff --git a/R/geoflow_data.R b/R/geoflow_data.R index 7dc193f6..986f66c1 100644 --- a/R/geoflow_data.R +++ b/R/geoflow_data.R @@ -21,7 +21,7 @@ geoflow_data <- R6Class("geoflow_data", features = NULL, initialize = function(str = NULL){ if(!is.null(str)){ - data_props <- unlist(strsplit(sanitize_str(str), ";")) + data_props <- extract_cell_components(sanitize_str(str)) data_props <- lapply(data_props, function(data_prop){ return(extract_kvp(data_prop)) }) diff --git a/R/geoflow_handler_contact.R b/R/geoflow_handler_contact.R index ba6b7f44..ced77a95 100644 --- a/R/geoflow_handler_contact.R +++ b/R/geoflow_handler_contact.R @@ -31,7 +31,7 @@ handle_contacts_df <- function(config, source){ srcId <- sanitize_str(source_contact[,"Identifier"]) if(!is.na(srcId)){ - identifiers <- unlist(strsplit(srcId, ";")) + identifiers <- extract_cell_components(srcId) if(length(identifiers)>0){ invisible(lapply(identifiers, function(identifier){ id_obj <- geoflow_kvp$new(str = identifier) diff --git a/R/geoflow_handler_entity.R b/R/geoflow_handler_entity.R index 6a4d4d46..e829d7f5 100644 --- a/R/geoflow_handler_entity.R +++ b/R/geoflow_handler_entity.R @@ -21,7 +21,7 @@ handle_entities_df <- function(config, source){ #types src_type <- sanitize_str(source_entity[,"Type"]) - types <- if(!is.na(src_type)) unlist(strsplit(src_type, ";")) else list() + types <- if(!is.na(src_type)) extract_cell_components(src_type) else list() if(length(types)>0){ if(length(types)==1){ entity$setType("generic", types) @@ -38,7 +38,7 @@ handle_entities_df <- function(config, source){ } #identifier - identifiers <- unlist(strsplit(sanitize_str(source_entity[,"Identifier"]), ";")) + identifiers <-extract_cell_components(sanitize_str(source_entity[,"Identifier"])) for(identifier in identifiers){ if(regexpr(":",identifier) == -1){ entity$setIdentifier("id", identifier) @@ -53,7 +53,7 @@ handle_entities_df <- function(config, source){ #description src_description <- sanitize_str(source_entity[,"Description"]) - descriptions <- if(!is.na(src_description)) unlist(strsplit(src_description, ";")) else list() + descriptions <- if(!is.na(src_description)) extract_cell_components(src_description) else list() if(length(descriptions)>0){ if(length(descriptions)==1){ if(regexpr(":",descriptions) == -1){ @@ -76,7 +76,7 @@ handle_entities_df <- function(config, source){ #subjects src_subject <- sanitize_str(source_entity[,"Subject"]) - subjects <- if(!is.na(src_subject)) unlist(strsplit(src_subject, ";")) else list() + subjects <- if(!is.na(src_subject)) extract_cell_components(src_subject) else list() if(length(subjects)>0){ invisible(lapply(subjects, function(subject){ subject_obj <- geoflow_subject$new(str = subject) @@ -86,7 +86,7 @@ handle_entities_df <- function(config, source){ #contacts src_contact <- sanitize_str(source_entity[,"Creator"]) - contacts <- if(!is.na(src_contact)) unlist(strsplit(src_contact, ";")) else list() + contacts <- if(!is.na(src_contact)) extract_cell_components(src_contact) else list() if(length(contacts)>0){ invisible(lapply(contacts, function(contact){ contact_splits <- unlist(strsplit(contact, ":")) @@ -108,7 +108,7 @@ handle_entities_df <- function(config, source){ #relations src_relation <- sanitize_str(source_entity[,"Relation"]) - relations <- if(!is.na(src_relation)) unlist(strsplit(src_relation, ";")) else list() + relations <- if(!is.na(src_relation)) extract_cell_components(src_relation) else list() if(length(relations)>0){ invisible(lapply(relations, function(relation){ relation_obj <- geoflow_relation$new(str = relation) @@ -121,7 +121,7 @@ handle_entities_df <- function(config, source){ if(!is.na(spatial_cov)){ if(!startsWith(spatial_cov,"SRID=")) stop("The spatial coverage should be a valid EWKT string, starting with the SRID definition (e.g. SRID=4326), followed by a semicolon and the WKT geometry") - spatial_cov <- unlist(strsplit(spatial_cov,";")) + spatial_cov <- unlist(strsplit(spatial_cov, ";")) if(length(spatial_cov)!=2) stop("The spatial coverage should be a valid EWKT string, starting with the SRID definition (e.g. SRID=4326), followed by a semicolon and the WKT geometry") spatial_srid <- as.integer(unlist(strsplit(spatial_cov[1],"SRID="))[2]) @@ -138,7 +138,7 @@ handle_entities_df <- function(config, source){ #Rights src_rights <- sanitize_str(source_entity[,"Rights"]) - rights <- if(!is.na(src_rights)) unlist(strsplit(src_rights, ";")) else list() + rights <- if(!is.na(src_rights)) extract_cell_components(src_rights) else list() if(length(rights)>0){ invisible(lapply(rights, function(right){ right_obj <- geoflow_right$new(str = right) diff --git a/R/geoflow_provenance.R b/R/geoflow_provenance.R index 1e6f9d51..01702045 100644 --- a/R/geoflow_provenance.R +++ b/R/geoflow_provenance.R @@ -6,7 +6,7 @@ geoflow_provenance <- R6Class("geoflow_provenance", processes = list(), initialize = function(str = NULL){ if(!is.null(str)){ - data_props <- unlist(strsplit(sanitize_str(str), ";")) + data_props <- extract_cell_components(sanitize_str(str)) state_prop <- data_props[[1]] if(!startsWith(state_prop, "statement")){ stop("The data 'statement' is mandatory") diff --git a/R/geoflow_utils.R b/R/geoflow_utils.R index 28df966c..f7b4acc0 100644 --- a/R/geoflow_utils.R +++ b/R/geoflow_utils.R @@ -2,8 +2,13 @@ #'@export sanitize_str <- function(str){ if(!is(str, "character")) return(str) + if(is.na(str)) return(NA) if(!is.na(str) & str=="") return(NA) - #str <- gsub("\n", ";", str) + startwith_n <- startsWith(str, "\n") + while(startwith_n){ + str <- substr(str, 2, nchar(str)) + startwith_n <- startsWith(str, "\n") + } str <- gsub(";;", ";", str) str <- gsub(",;", ",", str) str <- gsub(":;", ":", str) @@ -94,4 +99,24 @@ filter_sf_by_cqlfilter <- function(sfdata, cqlfilter){ sfdata.filtered <- try(eval(parse(text= sprintf("sfdata[%s,]",rfilter)))) if(class(sfdata.filtered)[1]!="try-error") out <- sfdata.filtered return(out) +} + +#'extract_cell_components +#'@export +extract_cell_components <- function(x){ + lines <- unlist(strsplit(x, get_line_separator())) + return(lines) +} + +#'set_line_separator +#'@export +set_line_separator <- function(x = ";\n"){ + if(!is(x,"character")) stop("The line separator should be an object of class 'character'") + .geoflow$LINE_SEPARATOR <- x +} + +#'get_line_separator +#'@export +get_line_separator <- function(){ + return(.geoflow$LINE_SEPARATOR) } \ No newline at end of file diff --git a/R/initWorkflow.R b/R/initWorkflow.R index 895597f5..d4489637 100644 --- a/R/initWorkflow.R +++ b/R/initWorkflow.R @@ -37,6 +37,15 @@ initWorkflow <- function(file){ config$profile <- profile } + #options + if(!is.null(config$options)){ + config$logger.info("Setting geoflow global options...") + if(!is.null(config$options$line_separator)){ + config$logger.info(sprintf("Setting option 'line_separator' to '%s'", config$options$line_separator)) + set_line_separator(config$options$line_separator) + } + } + #working dir if(is.null(config$wd)) config$wd <- getwd() diff --git a/R/profile.R b/R/profile.R index 4e59963e..d0027477 100644 --- a/R/profile.R +++ b/R/profile.R @@ -2,6 +2,9 @@ assign(".geoflow", new.env(), envir= asNamespace(pkgname)) + #default line separator + set_line_separator() + #embedded handlers register_contact_handlers() register_entity_handlers() diff --git a/man/extract_cell_components.Rd b/man/extract_cell_components.Rd new file mode 100644 index 00000000..bc91b700 --- /dev/null +++ b/man/extract_cell_components.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geoflow_utils.R +\name{extract_cell_components} +\alias{extract_cell_components} +\title{extract_cell_components} +\usage{ +extract_cell_components(x) +} +\description{ +extract_cell_components +} diff --git a/man/get_line_separator.Rd b/man/get_line_separator.Rd new file mode 100644 index 00000000..7a2bba30 --- /dev/null +++ b/man/get_line_separator.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geoflow_utils.R +\name{get_line_separator} +\alias{get_line_separator} +\title{get_line_separator} +\usage{ +get_line_separator() +} +\description{ +get_line_separator +} diff --git a/man/set_line_separator.Rd b/man/set_line_separator.Rd new file mode 100644 index 00000000..9ea21f09 --- /dev/null +++ b/man/set_line_separator.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geoflow_utils.R +\name{set_line_separator} +\alias{set_line_separator} +\title{set_line_separator} +\usage{ +set_line_separator(x) +} +\description{ +set_line_separator +}