Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating the event manager for MPI communication #136

Merged
merged 12 commits into from
Sep 6, 2018
63 changes: 54 additions & 9 deletions src/components/core/src/managers/EventManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,17 @@ void EventManager::Run(dataRepository::ManagedGroup * domain)
integer const verbosity = this->getReference<integer>(viewKeys.verbosity);
integer exitFlag = 0;

integer rank = 0;
integer comm_size = 1;
real64 *send_buffer = nullptr;
real64 *receive_buffer = nullptr;
#if USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
send_buffer = static_cast<real64 *>(malloc(2 * sizeof(real64)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not deallocated anywhere that I can see??

It is better just to do something like
real64 send_buffer[2];
array1d< real64 > receive_buffer( 2 * comm_size )

receive_buffer = static_cast<real64 *>(malloc(2 * sizeof(real64) * comm_size));
#endif

// Setup event targets
this->forSubGroups<EventBase>([]( EventBase * subEvent ) -> void
{
Expand All @@ -176,13 +187,20 @@ void EventManager::Run(dataRepository::ManagedGroup * domain)
while((time < maxTime) && (cycle < maxCycle) && (exitFlag == 0))
{
real64 nextDt = std::numeric_limits<real64>::max();
std::cout << "Time: " << time << "s, dt:" << dt << "s, Cycle: " << cycle << std::endl;

if (rank == 0)
{
std::cout << "Time: " << time << "s, dt:" << dt << "s, Cycle: " << cycle << std::endl;
}

this->forSubGroups<EventBase>([&]( EventBase * subEvent ) -> void
{
// Calculate the event and sub-event forecasts
// Note: because events can be nested, the mpi reduce for event
// forecasts need to happen in EventBase.
subEvent->CheckEvents(time, dt, cycle, domain);
integer eventForecast = subEvent->GetForecast();

// Execute, signal events
if (eventForecast == 1)
{
subEvent->SignalToPrepareForExecution(time, dt, cycle, domain);
Expand All @@ -193,31 +211,58 @@ void EventManager::Run(dataRepository::ManagedGroup * domain)
subEvent->Execute(time, dt, cycle, domain);
}

real64 requestedDt = 1e6;
// Estimate the time-step for the next cycle
if (eventForecast <= 1)
{
requestedDt = subEvent->GetTimestepRequest(time + dt);
real64 requestedDt = subEvent->GetTimestepRequest(time + dt);
nextDt = std::min(requestedDt, nextDt);
}
nextDt = std::min(requestedDt, nextDt);

// Check the exit flag
exitFlag += subEvent->GetExitFlag();

// Debug information
if (verbosity > 0)
if ((verbosity > 0) && (rank == 0))
{
std::cout << " Event: " << subEvent->getName() << ", f=" << eventForecast << ", dt_r=" << requestedDt << std::endl;
std::cout << " Event: " << subEvent->getName() << ", f=" << eventForecast << std::endl;
}
});

time += dt;
++cycle;
dt = nextDt;
dt = (time + dt > maxTime) ? (maxTime - time) : dt;

#if USE_MPI
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be more efficient than the two allreduce calls for the dt and exitFlag

send_buffer[0] = dt;
send_buffer[1] = static_cast<real64>(exitFlag);
MPI_Gather(send_buffer, 2, MPI_DOUBLE, receive_buffer, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);

if (rank == 0)
{
for (integer ii=0; ii<comm_size; ii++)
{
send_buffer[0] = std::min(send_buffer[0], receive_buffer[2*ii]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just use dt instead of send_buffer[0] as the argument?

send_buffer[1] += receive_buffer[2*ii + 1];
}
}

MPI_Bcast(send_buffer, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
dt = send_buffer[0];
if (send_buffer[1] > 0.5)
{
exitFlag = 1;
}
#endif
}


// Cleanup
std::cout << "Cleaning up events" << std::endl;
if (rank == 0)
{
std::cout << "Cleaning up events" << std::endl;
}

this->forSubGroups<EventBase>([&]( EventBase * subEvent ) -> void
{
subEvent->Cleanup(time, cycle, domain);
Expand Down
13 changes: 13 additions & 0 deletions src/components/core/src/managers/Events/EventBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ void EventBase::Step(real64 const time,
integer const cycle,
dataRepository::ManagedGroup * domain )
{
// Note: do we need an mpi barrier here?

if (m_target != nullptr)
{
m_target->Execute(time, dt, cycle, domain);
Expand Down Expand Up @@ -377,4 +379,15 @@ void EventBase::Cleanup(real64 const& time_n,



integer EventBase::GetExitFlag()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were not propagating any exit flags upwards to the main run loop.

{
this->forSubGroups<EventBase>([&]( EventBase * subEvent ) -> void
{
m_exitFlag += subEvent->GetExitFlag();
});

return m_exitFlag;
}


} /* namespace geosx */
3 changes: 1 addition & 2 deletions src/components/core/src/managers/Events/EventBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,12 @@ class EventBase : public ExecutableGroup
integer GetForecast(){ return m_eventForecast; }
void SetForecast(integer forecast){ m_eventForecast = forecast; }

integer GetExitFlag(){ return m_exitFlag; }
integer GetExitFlag();
void SetExitFlag(integer flag){ m_exitFlag = flag; }

private:
integer m_eventForecast = 0;
integer m_exitFlag = 0;

};

} /* namespace geosx */
Expand Down
8 changes: 8 additions & 0 deletions src/components/core/src/managers/Events/HaltEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ void HaltEvent::EstimateEventTiming(real64 const time,
m_realDt = currentTime - m_lastTime;
m_lastTime = currentTime;
integer forecast = static_cast<integer>((maxRuntime - (currentTime - m_startTime)) / m_realDt);

// The timing for the ranks may differ slightly, so synchronize
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This Allreduce can happen here, or we could store a common time-stamp for each of the ranks somewhere.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How will these be different? It seems that currentTime and m_lastTime will always be in sync?? You could sync up m_startTime in the constructor when it is set....wait what is going on here? Why do you care about the real world runtime? Is this for a maximum wall time?

#if USE_MPI
integer forecast_global;
MPI_Allreduce(&forecast, &forecast_global, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
forecast = forecast_global;
#endif

SetForecast(forecast);

if (this->GetForecast() <= 0)
Expand Down
8 changes: 8 additions & 0 deletions src/components/core/src/managers/Events/PeriodicEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,14 @@ void PeriodicEvent::CheckOptionalFunctionThreshold(real64 const time,
// Find the function (min, average, max)
real64_array stats = function->EvaluateStats(m_functionTarget, time, mySet);
result = stats[functionStatOption];

// Because the function applied to an object may differ by rank, synchronize
// (Note: this shouldn't occur very often, since it is only called if the base forecast <= 0)
#if USE_MPI
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allreduce should be fairly uncommon, since it will only be applied when the following apply:

  1. The base forecast is 0
  2. The forecast is being modified via a function/threshold being applied to an object (not a simple time-series)

real64 result_global;
MPI_Allreduce(&result, &result_global, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
result = result_global;
#endif
}

// Forcast event
Expand Down
2 changes: 2 additions & 0 deletions src/components/core/src/physicsSolvers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set(physicsSolvers_headers
FiniteVolume/SinglePhaseFlow.hpp
SimpleSolvers/LaplaceFEM.hpp
src/SolidMechanicsLagrangianFEM.hpp
src/DummySolver.hpp
)

#
Expand All @@ -19,6 +20,7 @@ set(physicsSolvers_sources
FiniteVolume/SinglePhaseFlow.cpp
SimpleSolvers/LaplaceFEM.cpp
src/SolidMechanicsLagrangianFEM.cpp
src/DummySolver.cpp
)


Expand Down
107 changes: 107 additions & 0 deletions src/components/core/src/physicsSolvers/src/DummySolver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This solver is meant to toy around with random time-step requests. I've been using it to make sure that the event manager, mpi communications are handling the time-stepping correctly.

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Copyright (c) 2018, Lawrence Livermore National Security, LLC.
*
* Produced at the Lawrence Livermore National Laboratory
*
* LLNL-CODE-746361
*
* All rights reserved. See COPYRIGHT for details.
*
* This file is part of the GEOSX Simulation Framework.
*
* GEOSX is a free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (as published by the
* Free Software Foundation) version 2.1 dated February 1999.
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/


#include "DummySolver.hpp"
#include "dataRepository/ManagedGroup.hpp"
#include <thread>
#include <chrono>

namespace geosx
{


using namespace dataRepository;


DummySolver::DummySolver( const std::string& name,
ManagedGroup * const parent ):
SolverBase( name, parent )
{}



DummySolver::~DummySolver()
{
// TODO Auto-generated destructor stub
}


void DummySolver::FillDocumentationNode()
{
cxx_utilities::DocumentationNode * const docNode = this->getDocumentationNode();
SolverBase::FillDocumentationNode();

docNode->setName(this->CatalogName());
docNode->setSchemaType("Node");
docNode->setShortDescription("Dummy solver for testing time-stepping behavior");

docNode->AllocateChildNode( viewKeys.rand_scale.Key(),
viewKeys.rand_scale.Key(),
-1,
"real64",
"real64",
"Scale for modifying requested dt",
"Scale for modifying requested dt",
"1e-9",
"",
1,
1,
0 );

}


void DummySolver::Initialize( ManagedGroup * const problemManager )
{
integer rank = 0;
#if USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#endif
std::srand(rank * 12345);
}


real64 DummySolver::SolverStep( real64 const& time_n,
real64 const& dt,
const int cycleNumber,
DomainPartition * domain )
{
std::this_thread::sleep_for(std::chrono::seconds(1));
return dt;
}


real64 DummySolver::GetTimestepRequest(real64 const time)
{
integer rank = 0;
#if USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#endif

real64 const rand_scale = this->getReference<real64>(viewKeys.rand_scale);
real64 dt_request = std::rand() * rand_scale;

std::cout << "time=" << time << ", solver=" << this->getName() << ", rank=" << rank << ", dt_r=" << dt_request << std::endl;

return dt_request;
}


REGISTER_CATALOG_ENTRY( SolverBase, DummySolver, std::string const &, ManagedGroup * const )
} /* namespace ANST */
64 changes: 64 additions & 0 deletions src/components/core/src/physicsSolvers/src/DummySolver.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Copyright (c) 2018, Lawrence Livermore National Security, LLC.
*
* Produced at the Lawrence Livermore National Laboratory
*
* LLNL-CODE-746361
*
* All rights reserved. See COPYRIGHT for details.
*
* This file is part of the GEOSX Simulation Framework.
*
* GEOSX is a free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (as published by the
* Free Software Foundation) version 2.1 dated February 1999.
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/


#ifndef DUMMYSOLVER_HPP_
#define DUMMYSOLVER_HPP_

#include "physicsSolvers/SolverBase.hpp"

namespace geosx
{
namespace dataRepository
{
class ManagedGroup;
}
class DomainPartition;

class DummySolver : public SolverBase
{
public:
DummySolver( const std::string& name,
ManagedGroup * const parent );


virtual ~DummySolver() override;

static string CatalogName() { return "DummySolver"; }

virtual void FillDocumentationNode() override;

virtual void Initialize( ManagedGroup * const problemManager ) override final;

virtual real64 SolverStep( real64 const& time_n,
real64 const& dt,
integer const cycleNumber,
DomainPartition * domain ) override;

virtual real64 GetTimestepRequest(real64 const time) override;

struct viewKeysStruct
{
dataRepository::ViewKey rand_scale = { "rand_scale" };
} viewKeys;

};

} /* namespace geosx */

#endif /* DUMMYSOLVER_HPP_ */