-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutilities-data.R
337 lines (314 loc) · 12.3 KB
/
utilities-data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#' Convert string to numeric
#'
#' @param string A string or a list of strings to be converted to numeric values
#' @param lloqMode How to treat entries below LLOQ, i.e., of a form "<2":
#' `LLOQ/2` (default): return the number divided by 2,
#' `LLOQ`: return the numerical value,
#' `ZERO`: return 0,
#' `ignore`: return `NA`
#' @param uloqMode How to treat entries above ULOQ, i.e., of a form ">2":
#' `ULOQ`: return the numerical value,
#' `ignore`: return `NA`
#'
#' @details Tries to convert each string to a numeric with `as.numeric()`.
#' If any conversion fails and returns `NA`, the value is tested for being a LLOQ-
#' or a ULOQ value, i.e., of a form "<2" or ">2", respectively. If this is a case,
#' the returned value is defined by the parameters `lloqMode` and `uloqMode`.
#' In any other case where the string cannot be converted to a numeric, `NA` is returned.
#' @return A numeric value or a list of numeric values
#' @export
stringToNum <- function(string, lloqMode = LLOQMode$`LLOQ/2`, uloqMode = ULOQMode$ULOQ) {
# Input validations
validateEnumValue(lloqMode, LLOQMode)
validateEnumValue(uloqMode, ULOQMode)
# Remove all whitespaces
string <- gsub(" ", "", string, fixed = TRUE)
# Attempt to convert all passed values to numeric
numVals <- suppressWarnings(as.numeric(string))
# If any values could not be interpreted and were coerced to NA, decide what to do (e.g. LLOQ and ULOQ treatment)
naVals <- is.na(numVals)
if (any(naVals)) {
for (idx in which(naVals)) {
# If the value in the original string is NA, skip
if (is.na(string[[idx]])) {
next
}
# CHECK FOR LLOQ
if (substring(string[[idx]], first = 1, last = 1) == "<") {
# Transform the value that follow the "<" character
value <- suppressWarnings(as.numeric(substring(string[[idx]], first = 2)))
# If value is NA (could not convert to numeric), continue, as the output
# should be NA
if (is.na(value)) {
next
}
switch(lloqMode,
"LLOQ/2" = {
numVals[[idx]] <- value / 2
},
"LLOQ" = numVals[[idx]] <- value,
# set all data points with lloq to 0
"ZERO" = numVals[[idx]] <- 0,
# remove data points with lloq
"ignore" = numVals[[idx]] <- NA
)
}
# CHECK FOR ULOQ
if (substring(string[[idx]], first = 1, last = 1) == ">") {
# Transform the value that follow the "<" character
value <- suppressWarnings(as.numeric(substring(string[[idx]], first = 2)))
# If value is NA (could not convert to numeric), continue, as the output
# should be NA
if (is.na(value)) {
next
}
switch(uloqMode,
"ULOQ" = numVals[[idx]] <- value,
# remove data points with lloq
"ignore" = numVals[[idx]] <- NA
)
}
}
}
return(numVals)
}
#' Calculate mean and standard deviation for the yValues of the given `DataSet` objects
#'
#' @param dataSets list of `DataSet` objects
#' @param method method for calculating the mean and standard deviation - either
#' `arithmetic` (default) or `geometric`
#' @param lloqMode how to treat data points below LLOQ if LLOQ is given - `LLOQ/2` (default):
#' use as given (since `DataSet` stores values below LLOQ as `LLOQ/2`),
#' `LLOQ`: set value to LLOQ value, `ZERO`: set value to 0, `ignore`:
#' do not use data points for mean calculation
#' @param outputXunit xUnit of output data set, if `NULL` (default) xUnit of the first data set
#' will be used
#' @param outputYunit yUnit of output data set, if `NULL` (default) yUnit of the first data set
#' will be used
#' @param outputMolWeight molWeight of output data set in `g/mol` - obligatory when initial
#' data sets have differing molWeight values
#' @details Calculates mean and standard deviation of the yValues of the given `DataSet`
#' objects per xValue. The meta data of the returned `DataSet` consists of all meta
#' data that are equal in all initial data sets. Its LLOQ is the mean LLOQ value of all
#' data sets which have an LLOQ set, e.g. if dataSet1 has LLOQ 1, dataSet2 has LLOQ 3
#' and dataSet3 has no LLOQ, then 2 is used for the returned `DataSet`.
#' The LLOQ of the returned `DataSet` is the arithmetic mean of LLOQ values of
#' all `DataSet`s
#' @return A single `DataSet` object
#' @export
calculateMeanDataSet <- function(dataSets, method = "arithmetic", lloqMode = LLOQMode$`LLOQ/2`,
outputXunit = NULL, outputYunit = NULL, outputMolWeight = NULL) {
validateIsOfType(dataSets, "DataSet")
if (!any(c("arithmetic", "geometric") == method)) {
stop(messages$errorInvalidMeanMethod())
}
validateEnumValue(lloqMode, LLOQMode)
meanDataSet <- ospsuite::DataSet$new(name = "Mean")
df <- ospsuite::dataSetToDataFrame(dataSets)
# If df is empty, return an empty mean data set
if (isEmpty(df)) {
meanDataSet$addMetaData(name = "Subject ID", value = "mean")
return(meanDataSet)
}
molWeights <- unique(df$molWeight)
# molWeight is specified by user - use as specified
if (!is.null(outputMolWeight)) {
meanDataSet$molWeight <- outputMolWeight
} else if (length(molWeights) > 1) {
# molWeight is not specified by user and molWeights of data sets differ -
# error
stop(messages$errorOutputMolWeightNeeded())
} else if (!is.na(molWeights)) {
# molWeight is not specified by user and all molWeights are equal and not NULL -
# take this value
meanDataSet$molWeight <- molWeights
}
# set units and dimensions to those of first data set if not specified otherwise
# outputXunit and outputYunit
if (is.null(outputXunit)) {
meanDataSet$xDimension <- df$xDimension[1]
meanDataSet$xUnit <- df$xUnit[1]
} else {
meanDataSet$xDimension <- ospsuite::getDimensionForUnit(outputXunit)
meanDataSet$xUnit <- outputXunit
}
if (is.null(outputYunit)) {
meanDataSet$yDimension <- df$yDimension[1]
meanDataSet$yUnit <- df$yUnit[1]
} else {
meanDataSet$yDimension <- ospsuite::getDimensionForUnit(outputYunit)
meanDataSet$yUnit <- outputYunit
}
# adjust yValues as specified by lloqMode argument
ind <- !is.na(df$lloq) & df$yValues < df$lloq
switch(lloqMode,
# nothing to do for LLOQ/2
"LLOQ/2" = {
},
# set all data points with lloq that are smaller than it to value of lloq
"LLOQ" = df[ind, "yValues"] <- df[ind, "lloq"],
# set all data points with lloq to 0
"ZERO" = df[ind, "yValues"] <- 0,
# remove data points with lloq
"ignore" = df <- df[!ind, ]
)
# meanDataSet$LLOQ = arithmetic mean lloq of all data sets with lloq
lloqMean <- suppressWarnings(mean(unlist(sapply(c(dataSets), function(x) {
x$LLOQ
}), use.names = FALSE), na.rm = TRUE))
if (!is.na(lloqMean)) {
meanDataSet$LLOQ <- lloqMean
}
# convert xValues to same unit
df$xValues <- mapply(
function(vals, unit) {
ospsuite::toUnit(
quantityOrDimension = meanDataSet$xDimension,
targetUnit = meanDataSet$xUnit,
values = vals,
sourceUnit = unit
)
},
df$xValues, df$xUnit
)
# convert yValues to same unit
df$yValues <- mapply(
function(vals, unit, mw) {
ospsuite::toUnit(
quantityOrDimension = meanDataSet$yDimension,
targetUnit = meanDataSet$yUnit,
values = vals,
sourceUnit = unit,
molWeight = mw,
molWeightUnit = "g/mol"
)
},
df$yValues, df$yUnit, df$molWeight
)
# calculate means and standard deviations according to chosen method
switch(method,
arithmetic = {
yMeans <- tapply(df[["yValues"]], df[["xValues"]], mean)
yError <- tapply(df[["yValues"]], df[["xValues"]], sd)
meanDataSet$setValues(xValues = as.numeric(names(yMeans)), yValues = yMeans, yErrorValues = yError)
meanDataSet$yErrorType <- ospsuite::DataErrorType$ArithmeticStdDev
},
geometric = {
yMeans <- tapply(df[["yValues"]], df[["xValues"]], geomean)
yError <- tapply(df[["yValues"]], df[["xValues"]], geosd)
meanDataSet$setValues(xValues = as.numeric(names(yMeans)), yValues = yMeans, yErrorValues = yError)
meanDataSet$yErrorType <- ospsuite::DataErrorType$GeometricStdDev
}
)
# add all meta that are equal in every data set
metaDataNames <- Reduce(intersect, lapply(c(dataSets), function(x) {
names(x$metaData)
}))
for (name in metaDataNames) {
value <- Reduce(intersect, sapply(c(dataSets), function(x) {
x$metaData[[name]]
}, simplify = FALSE))
if (length(value) != 0) {
meanDataSet$addMetaData(name = name, value = value)
}
}
meanDataSet$addMetaData(name = "Subject ID", value = "mean")
return(meanDataSet)
}
#' Possible entries for the `lloqMode` argument of `calculateMeans()`
#' @export
LLOQMode <- enum(list("LLOQ/2", "LLOQ", "ZERO", "ignore"))
#' Possible modes to treat values above the upper limit of quantification.
#' @export
ULOQMode <- enum(list("ULOQ", "ignore"))
#' Load data from excel
#'
#' @description Loads data sets from excel. The excel file containing the data
#' must be located in the folder `projectConfiguration$dataFolder`
#' and be named `projectConfiguration$dataFile`.
#' Importer configuration file must be located in the same folder and named
#' `projectConfiguration$dataImporterConfigurationFile`.
#'
#' @param projectConfiguration Object of class `ProjectConfiguration` containing
#' the necessary information.
#' @param sheets String or a list of strings defining which sheets to load.
#' If `NULL` (default), all sheets within the file are loaded.
#' @param importerConfiguration `DataImporterConfiguration` object used to load
#' the data. If `NULL` (default), default esqlabs importer configuration as
#' defined in `projectConfiguration$dataImporterConfigurationFile` will be used.
#'
#' @return
#' A named list of `DataSet` objects, with names being the names of the data sets.
#' @export
#'
#' @examples
#' \dontrun{
#' # Create default project configuration
#' projectConfiguration <- createProjectConfiguration()
#' dataSets <- loadObservedData(projectConfiguration)
#' }
loadObservedData <- function(projectConfiguration, sheets = NULL, importerConfiguration = NULL) {
importerConfiguration <- importerConfiguration %||% ospsuite::loadDataImporterConfiguration(
configurationFilePath = projectConfiguration$dataImporterConfigurationFile
)
validateIsString(sheets, nullAllowed = TRUE)
# If sheets have been specified, import only those. Otherwise try to import all
# sheets
importAllSheets <- TRUE
if (!is.null(sheets)) {
importerConfiguration$sheets <- sheets
importAllSheets <- FALSE
}
dataSets <- ospsuite::loadDataSetsFromExcel(
xlsFilePath = projectConfiguration$dataFile,
importerConfigurationOrPath = importerConfiguration,
importAllSheets = importAllSheets
)
return(dataSets)
}
#' Load data from pkml
#'
#' @description Loads data sets that are exported as pkml. The files must be
#' located in the folder `projectConfiguration$dataFolder`, subfolder `pkml`.
#' and be named `projectConfiguration$dataFile`.
#'
#' @param projectConfiguration Object of class `ProjectConfiguration` containing
#' the necessary information.
#' @param obsDataNames String or a list of strings defining data sets to load
#' If `NULL` (default), all data sets located in the folder are loaded. Must not
#' contain the ".pkml" file extension.
#'
#' @return
#' A named list of `DataSet` objects, with names being the names of the data sets.
#' @export
#'
#' @examples
#' \dontrun{
#' # Create default project configuration
#' projectConfiguration <- createProjectConfiguration()
#' dataSets <- loadObservedData(projectConfiguration)
#' }
loadObservedDataFromPKML <- function(projectConfiguration, obsDataNames = NULL) {
ospsuite.utils::validateIsString(obsDataNames, nullAllowed = TRUE)
# If data sets have been specified, import only those. Otherwise try to import all
# files
if (!is.null(obsDataNames)) {
allFiles <- paste0(obsDataNames, ".pkml")
} else {
allFiles <- list.files(path = file.path(
projectConfiguration$dataFolder,
"pkml"
), pattern = "*.pkml")
}
dataSets <- lapply(allFiles, function(fileName) {
ospsuite::loadDataSetFromPKML(filePath = file.path(
projectConfiguration$dataFolder,
"pkml",
fileName
))
})
names(dataSets) <- lapply(dataSets, function(x) {
x$name
})
return(dataSets)
}