Skip to content

Commit

Permalink
Fixes #693 expost data splitter for R
Browse files Browse the repository at this point in the history
  • Loading branch information
msevestre committed Jan 10, 2020
1 parent d000aa9 commit cec0f4c
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 45 deletions.
6 changes: 3 additions & 3 deletions src/OSPSuite.Core/Domain/Services/ContainerTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,7 @@ private int getNextAvailableIndexBasedOn(IEnumerable<string> allUsedNamesMatchin
var allValues = new List<int>();
foreach (var suffix in allUsedNamesMatchingBaseFormat)
{
int value;
if (int.TryParse(suffix, out value))
if (int.TryParse(suffix, out var value))
allValues.Add(value);
}

Expand All @@ -162,7 +161,8 @@ public string CreateUniqueName(IContainer parentContainer, string baseName, bool

public PathCache<TChildren> CacheAllChildrenSatisfying<TChildren>(IContainer parentContainer, Func<TChildren, bool> predicate) where TChildren : class, IEntity
{
return new PathCache<TChildren>(_entityPathResolver).For(parentContainer.GetAllChildren(predicate));
var pathCache = new PathCache<TChildren>(_entityPathResolver);
return parentContainer == null ? pathCache : pathCache.For(parentContainer.GetAllChildren(predicate));
}

public PathCache<TChildren> CacheAllChildren<TChildren>(IContainer parentContainer) where TChildren : class, IEntity
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,12 @@ public interface IEntitiesInSimulationRetriever
/// </summary>
PathCache<IParameter> ParametersFrom(IModelCoreSimulation simulation);


/// <summary>
/// Returns all <see cref="IQuantity" /> that were selected when calculating the <paramref name="simulation" /> (e.g.
/// Persistable=true)
/// </summary>
PathCache<IQuantity> OutputsFrom(IModelCoreSimulation simulation);


/// <summary>
/// Returns all <see cref="IQuantity" /> defined under the <paramref name="simulation" /> (search performed using
/// complete hierarchy) fulfilling the given <paramref name="predicate" />
Expand Down Expand Up @@ -77,10 +75,10 @@ public PathCache<IQuantity> OutputsFrom(IModelCoreSimulation simulation)
return outputs;
}

public PathCache<IQuantity> QuantitiesFrom(IModelCoreSimulation simulation) => EntitiesFrom<IQuantity>(simulation.Model.Root);
public PathCache<IQuantity> QuantitiesFrom(IModelCoreSimulation simulation) => QuantitiesFrom(simulation, x => true);

public PathCache<IQuantity> QuantitiesFrom(IModelCoreSimulation simulation, Func<IQuantity, bool> predicate) =>
EntitiesFrom(simulation.Model.Root, predicate);
EntitiesFrom(simulation, predicate);

public PathCache<IParameter> ParametersFrom(IModelCoreSimulation simulation) => EntitiesFrom<IParameter>(simulation);

Expand All @@ -89,7 +87,7 @@ public PathCache<IQuantity> QuantitiesFrom(IModelCoreSimulation simulation, Func
public PathCache<TEntity> EntitiesFrom<TEntity>(IModelCoreSimulation simulation) where TEntity : class, IEntity => EntitiesFrom<TEntity>(simulation, x => true);

public PathCache<TEntity> EntitiesFrom<TEntity>(IModelCoreSimulation simulation, Func<TEntity, bool> predicate) where TEntity : class, IEntity =>
EntitiesFrom(simulation.Model.Root, predicate);
EntitiesFrom(simulation?.Model?.Root, predicate);

public PathCache<TEntity> EntitiesFrom<TEntity>(IContainer container) where TEntity : class, IEntity => EntitiesFrom<TEntity>(container, x => true);

Expand Down
33 changes: 24 additions & 9 deletions src/OSPSuite.Core/Domain/Services/PopulationDataSplitter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,48 @@

namespace OSPSuite.Core.Domain.Services
{
internal class PopulationDataSplitter
public class PopulationDataSplitter
{
private readonly DataTable _populationData;
private readonly DataTable _agingData;
private readonly DataTable _initialValues;
private readonly int _numberOfCores;
private readonly int _numberOfSimulationsPerCore;

public PopulationDataSplitter(DataTable populationData, DataTable agingData, DataTable initialValues, int numberOfCores)
public PopulationDataSplitter(int numberOfCores, DataTable populationData, DataTable agingData = null, DataTable initialValues=null)
{
_populationData = populationData;
_agingData = agingData;
_initialValues = initialValues;
_agingData = agingData ?? undefinedAgingData();
_initialValues = initialValues ?? undefinedInitialValues();
_numberOfCores = numberOfCores;
_numberOfSimulationsPerCore = getNumberOfSimulationsPerCore();
}

private DataTable undefinedAgingData()
{
var table = new DataTable();
table.AddColumn<int>(Constants.Population.INDIVIDUAL_ID_COLUMN);
table.AddColumn<string>(Constants.Population.PARAMETER_PATH_COLUMN);
return table;
}

private DataTable undefinedInitialValues()
{
var table = new DataTable();
table.AddColumn<int>(Constants.Population.INDIVIDUAL_ID_COLUMN);
return table;
}

public void UpdateParametersAndInitialValuesForIndividual(int individualId, IReadOnlyList<ParameterProperties> parameterProperties, IReadOnlyList<SpeciesProperties> speciesProperties)
{
fillParameterAndInitialValuesFor(individualId, parameterProperties, speciesProperties);
}

public IEnumerable<int> GetIndividualIdsFor(int coreIndex)
public IReadOnlyList<int> GetIndividualIdsFor(int coreIndex)
{
var rowIndices = getRowIndices(coreIndex);
var rowIndices = GetRowIndices(coreIndex);
return rowIndices.Select(rowIndex => _populationData.Rows[rowIndex])
.Select(individualIdFrom);
.Select(individualIdFrom).ToList();
}

private int getNumberOfSimulationsPerCore()
Expand All @@ -46,14 +61,14 @@ private int getNumberOfSimulationsPerCore()
int rowsPerCore = numberOfJobs / numberOfCoresToUse;

if (rowsPerCore * _numberOfCores < numberOfJobs)
rowsPerCore = rowsPerCore + 1;
rowsPerCore += 1;

return rowsPerCore;
}

public int NumberOfIndividuals => _populationData.Rows.Count;

private IEnumerable<int> getRowIndices(int coreIndex)
public IEnumerable<int> GetRowIndices(int coreIndex)
{
int firstRow = _numberOfSimulationsPerCore * coreIndex;

Expand Down
27 changes: 4 additions & 23 deletions src/OSPSuite.Core/Domain/Services/PopulationRunner.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -38,9 +36,7 @@ public async Task<PopulationRunResults> RunPopulationAsync(IModelCoreSimulation
if (numberOfCoresToUse < 1)
numberOfCoresToUse = 1;

agingData = agingData ?? undefinedAgingData();
initialValues = initialValues ?? undefinedInitialValues();
_populationDataSplitter = new PopulationDataSplitter(populationData, agingData, initialValues, numberOfCoresToUse);
_populationDataSplitter = new PopulationDataSplitter(numberOfCoresToUse, populationData, agingData, initialValues);
_cancellationTokenSource = new CancellationTokenSource();
_populationRunResults = new PopulationRunResults();

Expand Down Expand Up @@ -71,21 +67,6 @@ public async Task<PopulationRunResults> RunPopulationAsync(IModelCoreSimulation
}
}

private DataTable undefinedAgingData()
{
var table = new DataTable();
table.AddColumn<int>(Constants.Population.INDIVIDUAL_ID_COLUMN);
table.AddColumn<string>(Constants.Population.PARAMETER_PATH_COLUMN);
return table;
}

private DataTable undefinedInitialValues()
{
var table = new DataTable();
table.AddColumn<int>(Constants.Population.INDIVIDUAL_ID_COLUMN);
return table;
}

private Task runSimulation(int coreIndex, string simulationExport, CancellationToken cancellationToken)
{
return Task.Run(() =>
Expand Down Expand Up @@ -113,7 +94,7 @@ private void simulate(Simulation simulation, int coreIndex, CancellationToken ca
cancellationToken.ThrowIfCancellationRequested();

//get row indices for the simulations on current core
_populationDataSplitter.UpdateParametersAndInitialValuesForIndividual(individualId, variableParameters, variableSpecies);
_populationDataSplitter.UpdateParametersAndInitialValuesForIndividual(individualId, variableParameters, variableSpecies);


//set new parameter values into SimModel
Expand All @@ -133,7 +114,7 @@ private void simulate(Simulation simulation, int coreIndex, CancellationToken ca
}
finally
{
_populationRunResults.AddWarnings(individualId, WarningsFrom(simulation));
_populationRunResults.AddWarnings(individualId, WarningsFrom(simulation));

//Could lead to a wrong progress if two threads are accessing the value at the same time
SimulationProgress(this, new MultipleSimulationsProgressEventArgs(++_numberOfProcessedSimulations, _numberOfSimulationsToRun));
Expand Down Expand Up @@ -202,7 +183,7 @@ private Simulation createAndFinalizeSimulation(string simulationExport, Cancella
/// <param name="simulation">SimModel simulation</param>
private void setVariableParameters(Simulation simulation)
{
SetVariableParameters(simulation, _populationDataSplitter.ParameterPathsToBeVaried(), calculateSensitivities: false);
SetVariableParameters(simulation, _populationDataSplitter.ParameterPathsToBeVaried(), calculateSensitivities: false);
}

/// <summary>
Expand Down
48 changes: 46 additions & 2 deletions src/OSPSuite.R/Services/PopulationTask.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using OSPSuite.Core.Domain;
using OSPSuite.Core.Domain.Populations;
using OSPSuite.Core.Domain.Services;
using OSPSuite.Core.Extensions;
using OSPSuite.Infrastructure.Import.Services;
using OSPSuite.R.Extensions;
using OSPSuite.Utility;
using OSPSuite.Utility.Extensions;

namespace OSPSuite.R.Services
{
public interface IPopulationTask
{
IndividualValuesCache ImportPopulation(string fileFullPath);
DataTable PopulationTableFrom(IndividualValuesCache population, IModelCoreSimulation simulation);

DataTable PopulationTableFrom(IndividualValuesCache population, IModelCoreSimulation simulation = null);

/// <summary>
/// Loads the population from the <paramref name="populationFile"/> and split the loaded population according to the <paramref name="numberOfCores"/>.
/// Resulting files will be exported in the <paramref name="outputFolder"/>. File names will be constructed using the <paramref name="outputFileName"/> concatenated with the node index.
/// Returns an array of string containing the full path of the population files created
/// </summary>
IReadOnlyList<string> SplitPopulation(string populationFile, int numberOfCores, string outputFolder, string outputFileName);
}

public class PopulationTask : IPopulationTask
Expand All @@ -36,7 +46,7 @@ public IndividualValuesCache ImportPopulation(string fileFullPath)
return parameterValuesCache;
}

public DataTable PopulationTableFrom(IndividualValuesCache population, IModelCoreSimulation simulation)
public DataTable PopulationTableFrom(IndividualValuesCache population, IModelCoreSimulation simulation = null)
{
var dataTable = new DataTable();
var allParameters = _entitiesInSimulationRetriever.QuantitiesFrom(simulation);
Expand All @@ -56,6 +66,40 @@ public DataTable PopulationTableFrom(IndividualValuesCache population, IModelCor
return dataTable;
}

public IReadOnlyList<string> SplitPopulation(string populationFile, int numberOfCores, string outputFolder, string outputFileName)
{
var population = ImportPopulation(populationFile);
var populationData = PopulationTableFrom(population);
var dataSplitter = new PopulationDataSplitter(numberOfCores, populationData);
DirectoryHelper.CreateDirectory(outputFolder);
var outputFiles = new List<string>();

for (int i = 0; i < numberOfCores; i++)
{
var outputFile = Path.Combine(outputFolder, $"{outputFileName}_{i + 1}{Constants.Filter.CSV_EXTENSION}");
var rowIndices = dataSplitter.GetRowIndices(i).ToList();

//This is potentially empty if the number of individuals in the population is less than the number of cores provided
if(!rowIndices.Any())
continue;

outputFiles.Add(outputFile);
exportSplitPopulation(populationData, rowIndices, outputFile);
}

return outputFiles;
}

private void exportSplitPopulation(DataTable populationData, IReadOnlyList<int> rowIndices, string outputFile)
{
var dataTable = populationData.Clone();
rowIndices.Each(index =>
{
dataTable.ImportRow(populationData.Rows[index]);
});
dataTable.ExportToCSV(outputFile);
}

private void addCovariates(IndividualValuesCache population, DataTable dataTable)
{
var individualIds = Enumerable.Range(0, population.Count).ToList();
Expand Down
2 changes: 2 additions & 0 deletions tests/OSPSuite.R.Tests/ContextForIntegration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,7 @@ public override void GlobalContext()
Environment.CurrentDirectory = AppDomain.CurrentDomain.BaseDirectory;

}

public string DataFile(string fileName) => Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Data", fileName);
}
}
80 changes: 77 additions & 3 deletions tests/OSPSuite.R.Tests/Services/PopulationTaskSpecs.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using OSPSuite.BDDHelper;
using OSPSuite.BDDHelper.Extensions;
using OSPSuite.Core.Domain;
using OSPSuite.Core.Domain.Populations;
using OSPSuite.Utility;
using OSPSuite.Utility.Container;
using OSPSuite.Utility.Extensions;

namespace OSPSuite.R.Services
{
Expand All @@ -19,9 +22,9 @@ public abstract class concern_for_PopulationTask : ContextForIntegration<IPopula
public override void GlobalContext()
{
base.GlobalContext();
_populationFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Data", "pop_10.csv");
_populationFileWithUnitInParameterName = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Data", "pop_10_parameter_with_unit.csv");
_simulationFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Data", "S1.pkml");
_populationFile = DataFile("pop_10.csv");
_populationFileWithUnitInParameterName = DataFile("pop_10_parameter_with_unit.csv");
_simulationFile = DataFile("S1.pkml");
_simulationPersister = IoC.Resolve<ISimulationPersister>();
sut = IoC.Resolve<IPopulationTask>();
}
Expand Down Expand Up @@ -95,4 +98,75 @@ public void should_not_remove_the_units_from_parameter_with_path_in_their_name()
_dataTable.Columns["Acidic phospholipids - RR"].ShouldBeNull();
}
}

public class When_splitting_a_population_file_into_multiple_files : concern_for_PopulationTask
{
private IReadOnlyList<string> _result;
private string _outputFolder;
private int _numberOfCores = 5;
public override void GlobalContext()
{
base.GlobalContext();
var tmpFile = FileHelper.GenerateTemporaryFileName();
_outputFolder = new FileInfo(tmpFile).DirectoryName;
}

protected override void Because()
{
_result = sut.SplitPopulation(_populationFile, _numberOfCores, _outputFolder, "PopFile");
}

[Observation]
public void should_create_one_file_per_requested_core()
{
_result.Count.ShouldBeEqualTo(_numberOfCores);
for (int i = 0; i < _numberOfCores; i++)
{
_result.ShouldContain(Path.Combine(_outputFolder, $"PopFile_{i+1}.csv"));
}
}

public override void GlobalCleanup()
{
base.GlobalCleanup();
_result.Each(FileHelper.DeleteFile);
}
}

public class When_splitting_a_population_file_into_multiple_files_and_the_number_of_individuals_is_less_than_the_number_of_cores : concern_for_PopulationTask
{
private IReadOnlyList<string> _result;
private string _outputFolder;
private int _numberOfCores = 30;
private IndividualValuesCache _individualValuesCache;

public override void GlobalContext()
{
base.GlobalContext();
var tmpFile = FileHelper.GenerateTemporaryFileName();
_outputFolder = new FileInfo(tmpFile).DirectoryName;
_individualValuesCache = sut.ImportPopulation(_populationFile);
}

protected override void Because()
{
_result = sut.SplitPopulation(_populationFile, _numberOfCores, _outputFolder, "PopFile");
}

[Observation]
public void should_only_create_one_file_per_individual()
{
_result.Count.ShouldBeEqualTo(_individualValuesCache.Count);
for (int i = 0; i < _individualValuesCache.Count; i++)
{
_result.ShouldContain(Path.Combine(_outputFolder, $"PopFile_{i + 1}.csv"));
}
}

public override void GlobalCleanup()
{
base.GlobalCleanup();
_result.Each(FileHelper.DeleteFile);
}
}
}

0 comments on commit cec0f4c

Please sign in to comment.