diff --git a/src/main/conf/import.json b/src/main/conf/import.json index 4f3ea83..033bf41 100644 --- a/src/main/conf/import.json +++ b/src/main/conf/import.json @@ -12,21 +12,23 @@ { "key": "delim", "title": "Delimiter Character", + "default": ",", + "maximum": 1, "type": "string", "description": "Single character used to separate fields within a record. Leave blank to read whitespace-separated columns." }, { - "key": "col_types", - "title": "Column Types", + "key": "na", "type": "string", - "format": "textarea", - "default": ".default=\"?\"", - "description": "A compact string representation where each character represents one column: c = character, i = integer, n = number, d = double, l = logical, D = date, T = date time, t = time, ? = guess, or _/- to skip the column." + "title": "Missing Values", + "description": "A comma separated list of values to be considered as missing." }, { - "key": "is_col_types_subset", - "type": "boolean", - "title": "Subset of Columns" + "key": "skip", + "type": "integer", + "default": 0, + "title": "Skip", + "description": "Number of lines to skip before reading data." }, { "key": "locale", diff --git a/src/main/java/org/obiba/datasource/opal/readr/DataReadROperation.java b/src/main/java/org/obiba/datasource/opal/readr/DataReadROperation.java index 5a7a69c..2ebafa9 100644 --- a/src/main/java/org/obiba/datasource/opal/readr/DataReadROperation.java +++ b/src/main/java/org/obiba/datasource/opal/readr/DataReadROperation.java @@ -11,16 +11,20 @@ public class DataReadROperation extends AbstractROperation { private final String delimiter; - private final String columnSpecification; + private final String missingValuesCharacters; - private final boolean columnSpecificationForSubset; + private final int numberOfRecordsToSkip; - public DataReadROperation(String symbol, String source, String delimiter, String columnSpecification, boolean columnSpecificationForSubset) { + private final String locale; + + public DataReadROperation(String symbol, + String source, String delimiter, String missingValuesCharacters, int numberOfRecordsToSkip, String locale) { this.symbol = symbol; this.source = source; this.delimiter = delimiter; - this.columnSpecification = columnSpecification; - this.columnSpecificationForSubset = columnSpecificationForSubset; + this.missingValuesCharacters = missingValuesCharacters; + this.numberOfRecordsToSkip = numberOfRecordsToSkip; + this.locale = locale; } @Override @@ -38,18 +42,23 @@ private String getCommand() { } private String readWithDelimiter() { - return String.format("read_delim('%s', delim = '%s'%s)", source, delimiter, columnTypes()); + return String.format("read_delim('%s', delim = '%s'%s%s%s)", source, delimiter, missingValues(), numberOfRecordsToSkipValue(), localeValue()); } private String readWithTable() { - return String.format("read_table('%s'%s)", source, columnTypes()); + return String.format("read_table('%s'%s%s%s)", source, missingValues(), numberOfRecordsToSkipValue(), localeValue()); + } + + private String missingValues() { + return Strings.isNullOrEmpty(missingValuesCharacters) ? ", na = c(\"\", \"NA\")" : String.format(", na = c(%s)", missingValuesCharacters); + } + + private String numberOfRecordsToSkipValue() { + return ", skip = " + numberOfRecordsToSkip; } - private String columnTypes() { - if (Strings.isNullOrEmpty(columnSpecification)) { - return ""; - } - return ", col_types = " + String.format(columnSpecificationForSubset ? "cols_only(%s)" : "cols(%s)", columnSpecification); + private String localeValue() { + return String.format(", locale = locale(\"%s\")", locale); } @Override diff --git a/src/main/java/org/obiba/datasource/opal/readr/ReadRDatasourceService.java b/src/main/java/org/obiba/datasource/opal/readr/ReadRDatasourceService.java index 2b69160..30889f9 100644 --- a/src/main/java/org/obiba/datasource/opal/readr/ReadRDatasourceService.java +++ b/src/main/java/org/obiba/datasource/opal/readr/ReadRDatasourceService.java @@ -4,6 +4,8 @@ import javax.validation.constraints.NotNull; +import com.google.common.base.Strings; + import org.json.JSONObject; import org.obiba.magma.Datasource; import org.obiba.magma.DatasourceFactory; @@ -33,14 +35,16 @@ protected Datasource internalCreate() { File file = resolvePath(parameters.optString("file")); String delimiter = parameters.optString("delim"); - String columnTypes = parameters.optString("col_types"); - boolean columnSpecificationForSubset = parameters.optBoolean("is_col_types_subset"); + String missingValuesCharacters = parameters.optString("na"); + String locale = parameters.optString("locale"); + int skip = parameters.optInt("skip"); String symbol = getSymbol(file); // copy file to the R session prepareFile(file); - execute(new DataReadROperation(symbol, file.getName(), delimiter, columnTypes, columnSpecificationForSubset)); - return new RDatasource(getName(), getRSessionHandler(), symbol, parameters.optString("entity_type"), parameters.optString("id")); + execute(new DataReadROperation(symbol, file.getName(), delimiter, missingValuesCharacters, skip, Strings.isNullOrEmpty(locale) ? "en" : locale)); + return new RDatasource(getName(), getRSessionHandler(), symbol, parameters.optString("entity_type"), + parameters.optString("id")); } }; factory.setRSessionHandler(getRSessionHandler());