Skip to content

Commit

Permalink
383 improve default binning (#387)
Browse files Browse the repository at this point in the history
* Update getDemographyAggregatedData with bins and stairstep new input

* Fixes #383 binning performed after simulationSet selection

The binning method is now centralized for all the demography parameters

* Update documentation from merge

* Update vignette with the simplified workflow

* aggregation outputs NA values when there is no data in the bin

* Remove simulationSetName from aggregation

Because the aggregation is now performed by simulation set, 
keeping the variable is not necessary and actually causing issues for the stairstep implementation
  • Loading branch information
pchelle authored Nov 23, 2020
1 parent 8102ca3 commit ffefd32
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 61 deletions.
28 changes: 18 additions & 10 deletions R/utilities-calculate-pk-parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,7 @@ plotPopulationPKParameters <- function(structureSets,
"median" = pkParameterMetaData$Value
)

aggregatedData <- getDemographyAggregatedData(
data = pkParameterData,
xParameterName = demographyParameter,
yParameterName = "Value",
xParameterBreaks = settings$xParametersBreaks[[demographyParameter]]
)

populationNames <- levels(factor(aggregatedData$Population))
populationNames <- levels(factor(pkParameterData$simulationSetName))

# For pediatric workflow, range plots compare reference population to the other populations
if (workflowType %in% c(PopulationWorkflowTypes$pediatric)) {
Expand All @@ -285,7 +278,14 @@ plotPopulationPKParameters <- function(structureSets,
)

for (populationName in populationNames[!populationNames %in% referencePopulationName]) {
comparisonData <- aggregatedData[aggregatedData$Population %in% populationName, ]
comparisonData <- pkParameterData[pkParameterData$simulationSetName %in% populationName, ]
comparisonData <- getDemographyAggregatedData(
data = comparisonData,
xParameterName = demographyParameter,
yParameterName = "Value",
bins = settings$bins,
stairstep = settings$stairstep
)
comparisonData$Population <- paste("Simulated", AggregationConfiguration$names$middle, "and", AggregationConfiguration$names$range)

comparisonVpcPlot <- vpcParameterPlot(
Expand All @@ -304,7 +304,15 @@ plotPopulationPKParameters <- function(structureSets,

# Regular range plots not associated to workflow type
for (populationName in populationNames) {
vpcData <- aggregatedData[aggregatedData$Population %in% populationName, ]
vpcData <- pkParameterData[pkParameterData$simulationSetName %in% populationName, ]
vpcData <- getDemographyAggregatedData(
data = vpcData,
xParameterName = demographyParameter,
yParameterName = "Value",
bins = settings$bins,
stairstep = settings$stairstep
)

vpcData$Population <- paste("Simulated", AggregationConfiguration$names$middle, "and", AggregationConfiguration$names$range)
vpcPlot <- vpcParameterPlot(
data = vpcData,
Expand Down
89 changes: 57 additions & 32 deletions R/utilities-demography.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ plotDemographyParameters <- function(structureSets,
metaData = demographyMetaData,
dataMapping = histogramMapping,
plotConfiguration = settings$plotConfigurations[["histogram"]],
bins = settings$bins %||% 11
bins = settings$bins %||% AggregationConfiguration$bins
)
demographyPlots[[parameterLabel]] <- demographyHistogram
demographyCaptions[[parameterLabel]] <- getPkParametersCaptions("Histogram", captionSimulationNames, demographyMetaData[[parameterName]])
Expand All @@ -84,7 +84,7 @@ plotDemographyParameters <- function(structureSets,
metaData = demographyMetaData,
dataMapping = histogramMapping,
plotConfiguration = settings$plotConfigurations[["histogram"]],
bins = settings$bins %||% 11
bins = settings$bins %||% AggregationConfiguration$bins
)

demographyPlots[[paste0(parameterLabel, "-", populationName)]] <- demographyHistogram
Expand Down Expand Up @@ -112,12 +112,6 @@ plotDemographyParameters <- function(structureSets,
"x" = demographyMetaData[[demographyParameter]],
"median" = demographyMetaData[[parameterName]]
)
aggregatedData <- getDemographyAggregatedData(
data = demographyData,
xParameterName = demographyParameter,
yParameterName = parameterName,
xParameterBreaks = settings$xParametersBreaks[[demographyParameter]]
)

# For pediatric workflow, range plots compare reference population to the other populations
if (workflowType %in% c(PopulationWorkflowTypes$pediatric)) {
Expand All @@ -138,7 +132,14 @@ plotDemographyParameters <- function(structureSets,
)

for (populationName in populationNames[!populationNames %in% referencePopulationName]) {
comparisonData <- aggregatedData[aggregatedData$Population %in% populationName, ]
comparisonData <- demographyData[demographyData$simulationSetName %in% populationName, ]
comparisonData <- getDemographyAggregatedData(
data = comparisonData,
xParameterName = demographyParameter,
yParameterName = parameterName,
bins = settings$bins,
stairstep = settings$stairstep
)
comparisonData$Population <- paste("Simulated", AggregationConfiguration$names$middle, "and", AggregationConfiguration$names$range)

comparisonVpcPlot <- vpcParameterPlot(
Expand All @@ -156,7 +157,14 @@ plotDemographyParameters <- function(structureSets,
}

for (populationName in populationNames) {
vpcData <- aggregatedData[aggregatedData$Population %in% populationName, ]
vpcData <- demographyData[demographyData$simulationSetName %in% populationName, ]
vpcData <- getDemographyAggregatedData(
data = vpcData,
xParameterName = demographyParameter,
yParameterName = parameterName,
bins = settings$bins,
stairstep = settings$stairstep
)
vpcData$Population <- paste("Simulated", AggregationConfiguration$names$middle, "and", AggregationConfiguration$names$range)

vpcPlot <- vpcParameterPlot(
Expand Down Expand Up @@ -232,42 +240,43 @@ getDefaultDemographyXParameters <- function(workflowType) {
getDemographyAggregatedData <- function(data,
xParameterName,
yParameterName,
xParameterBreaks = NULL) {
xParameterBreaks <- xParameterBreaks %||% 10
bins = NULL,
stairstep = TRUE) {
stairstep <- stairstep %||% TRUE
xParameterBreaks <- bins %||% AggregationConfiguration$bins
# binningOnQuantiles use data distribution to improve the binning
if (isOfLength(bins, 1) & AggregationConfiguration$binUsingQuantiles) {
xParameterBreaks <- unique(unname(quantile(x = data[, xParameterName], probs = seq(0, 1, length.out = xParameterBreaks))))
}
xParameterBins <- cut(data[, xParameterName], breaks = xParameterBreaks)

# simulationSetName was removed from "by" input because
# it is a factor class that messes up the aggregation now that
# simulationSetName filtering is performed before aggregation
xData <- stats::aggregate(
x = data[, xParameterName],
by = list(
Bins = xParameterBins,
Population = data[, "simulationSetName"]
),
FUN = AggregationConfiguration$functions$middle
by = list(Bins = xParameterBins),
FUN = AggregationConfiguration$functions$middle,
drop = FALSE
)

medianData <- stats::aggregate(
x = data[, yParameterName],
by = list(
Bins = xParameterBins,
Population = data[, "simulationSetName"]
),
FUN = AggregationConfiguration$functions$middle
by = list(Bins = xParameterBins),
FUN = AggregationConfiguration$functions$middle,
drop = FALSE
)
lowPercData <- stats::aggregate(
x = data[, yParameterName],
by = list(
Bins = xParameterBins,
Population = data[, "simulationSetName"]
),
FUN = AggregationConfiguration$functions$ymin
by = list(Bins = xParameterBins),
FUN = AggregationConfiguration$functions$ymin,
drop = FALSE
)
highPercData <- stats::aggregate(
x = data[, yParameterName],
by = list(
Bins = xParameterBins,
Population = data[, "simulationSetName"]
),
FUN = AggregationConfiguration$functions$ymax
by = list(Bins = xParameterBins),
FUN = AggregationConfiguration$functions$ymax,
drop = FALSE
)

aggregatedData <- cbind.data.frame(xData,
Expand All @@ -276,6 +285,22 @@ getDemographyAggregatedData <- function(data,
ymax = highPercData$x
)

if (stairstep) {
# Method in documentation of cut to get the bin edges
labs <- levels(xParameterBins)
xminValues <- as.numeric(sub("\\((.+),.*", "\\1", labs))
xmaxValues <- as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", labs))

xData <- rbind.data.frame(xData, xData)
xData$x <- sort(c(xminValues, xmaxValues))

aggregatedData <- cbind.data.frame(xData,
median = rep(medianData$x, each = 2),
ymin = rep(lowPercData$x, each = 2),
ymax = rep(highPercData$x, each = 2)
)
}

return(aggregatedData)
}

Expand Down
10 changes: 8 additions & 2 deletions R/utilities-plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ setPlotFormat <- function(format, width = NULL, height = NULL, units = NULL) {
}

#' @title AggregationConfiguration
#' @description Aggregation default properties (which functions and their captions)
#' @description Aggregation default properties (which functions and their captions).
#' @field functions list of `middle`, `ymin` and `ymax` functions for aggregation
#' @field names list of legend captions for `middle` and `range` from aggregation
#' @field bins default number of bins in plots
#' @field binUsingQuantiles logical to choose a binning based on the quantiles rather thanon a constant interval width
#' @export
AggregationConfiguration <- list(
functions = list(
Expand All @@ -60,7 +64,9 @@ AggregationConfiguration <- list(
names = list(
middle = "median",
range = "[5-95th] percentiles"
)
),
bins = 11,
binUsingQuantiles = TRUE
)

workflowWatermarkMessage <- "preliminary analysis"
Expand Down
16 changes: 14 additions & 2 deletions man/AggregationConfiguration.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 3 additions & 5 deletions man/getSAFileIndex.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 18 additions & 10 deletions vignettes/plot-settings.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -291,18 +291,23 @@ The names in the __`plotConfigurations`__ list available for the __"plotPKParame
- __"comparisonVpcPlot"__: for VPC like plots of PK parameters vs Demography parameters comparing to reference population performed by pediatric type

Since pediatric workflows perform VPC like plot, aggregation of the data is performed along the demography parameters.
The binning of the demography parameters can be set by the __`settings`__ optional field __`xParametersBreaks`__.
This field is a list vectors corresponding of the edges of the bins and whose names are the demography parameter path.
Below shows an example of how to set such an option:
The binning of the demography parameters can be set by the optional field __`bins`__ from the task field __`settings`__.
This field can be either a unique value corresponding to the number of bins or a vector defining the bin edges for all the demography parameter paths.

Besides, the final plot can either link the aggregated values or plot them as stairstep.
The default behaviour is to perform a stairstep plot, but this can be tuned with the optional field __`stairstep`__ (`TRUE`/`FALSE`) from the task field __`settings`__.

Below shows examples of how to set such options:

```{r PK parameter settings}
# Create an empty list
myParameterSettings <- list()
# Associate the bin edges
workflowA$plotPKParameters$settings$bins <- c(0, 1, 2, 3, 5, 10)
# Associate the bin edges for Age demography parameter field
myParameterSettings[[StandardPath$Age]]$xParametersBreaks <- c(0, 1, 2, 3, 5, 10)
# Associate the number of bins
workflowA$plotPKParameters$settings$bins <- 15
workflowA$plotPKParameters$settings <- myParameterSettings
# Set VPC as stair step
workflowA$plotPKParameters$settings$stairstep <- TRUE
```

## Sensitivity plots
Expand Down Expand Up @@ -357,7 +362,10 @@ The names in the __`plotConfigurations`__ list available for the __"plotDemograp
- __"comparisonVpcPlot"__: for VPC like plots of Demography parameters vs Demography parameters comparing to reference population performed by pediatric type

Since pediatric workflows perform VPC like plot, aggregation of the data is performed along the demography parameters.
The binning of the demography parameters can be set by the __`settings`__ optional field __`xParametersBreaks`__.
This field is a list vectors corresponding of the edges of the bins and whose names are the demography parameter path.
The binning of the demography parameters can be set by the optional field __`bins`__ from the task field __`settings`__.
This field can be either a unique value corresponding to the number of bins or a vector defining the bin edges for all the demography parameter paths.

Besides, the final plot can either link the aggregated values or plot them as stairstep.
The default behaviour is to perform a stairstep plot, but this can be tuned with the optional field __`stairstep`__ (`TRUE`/`FALSE`) from the task field __`settings`__.

Similarly, parallel comparison and ratio types of population workflows perform histograms, whose binning can set using the __`settings`__ field __`bins`__.

0 comments on commit ffefd32

Please sign in to comment.