-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Different async mechanism: MPI message buffer #111
Merged
Merged
Changes from 7 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
921bd41
adding test that breaks current implementation
csegarragonz 17a48d1
removing thread pool and implementing the umb
csegarragonz 0110ff4
adding more tests
csegarragonz a634405
introducing the mpi message buffer and encapsulating most logic there
csegarragonz 3055782
adding tests for the mpi message buffer + formatting
csegarragonz 0a7ab7c
pr comments
csegarragonz 88bcb7d
switching to per-world port range
csegarragonz 1422843
adding more tests
csegarragonz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#include <faabric/mpi/mpi.h> | ||
#include <faabric/proto/faabric.pb.h> | ||
|
||
#include <iterator> | ||
#include <list> | ||
|
||
namespace faabric::scheduler { | ||
/* The MPI message buffer (MMB) keeps track of the asyncrhonous | ||
* messages that we must have received (i.e. through an irecv call) but we | ||
* still have not waited on (acknowledged). Messages are acknowledged either | ||
* through a call to recv or a call to await. A call to recv will | ||
* acknowledge (i.e. synchronously read from transport buffers) as many | ||
* unacknowleged messages there are. A call to await with a request | ||
* id as a parameter will acknowledge as many unacknowleged messages there are | ||
* until said request id. | ||
*/ | ||
class MpiMessageBuffer | ||
{ | ||
public: | ||
/* This structure holds the metadata for each Mpi message we keep in the | ||
* buffer. Note that the message field will point to null if unacknowleged | ||
* or to a valid message otherwise. | ||
*/ | ||
class PendingAsyncMpiMessage | ||
{ | ||
public: | ||
int requestId = -1; | ||
std::shared_ptr<faabric::MPIMessage> msg = nullptr; | ||
int sendRank = -1; | ||
int recvRank = -1; | ||
uint8_t* buffer = nullptr; | ||
faabric_datatype_t* dataType = nullptr; | ||
int count = -1; | ||
faabric::MPIMessage::MPIMessageType messageType = | ||
faabric::MPIMessage::NORMAL; | ||
|
||
bool isAcknowledged() { return msg != nullptr; } | ||
|
||
void acknowledge(std::shared_ptr<faabric::MPIMessage> msgIn) | ||
{ | ||
msg = msgIn; | ||
} | ||
}; | ||
|
||
/* Interface to query the buffer size */ | ||
|
||
bool isEmpty(); | ||
|
||
int size(); | ||
|
||
/* Interface to add and delete messages to the buffer */ | ||
|
||
void addMessage(PendingAsyncMpiMessage msg); | ||
|
||
void deleteMessage( | ||
const std::list<PendingAsyncMpiMessage>::iterator& msgIt); | ||
|
||
/* Interface to get a pointer to a message in the MMB */ | ||
|
||
// Pointer to a message given its request id | ||
std::list<PendingAsyncMpiMessage>::iterator getRequestPendingMsg( | ||
int requestId); | ||
|
||
// Pointer to the first null-pointing (unacknowleged) message | ||
std::list<PendingAsyncMpiMessage>::iterator getFirstNullMsg(); | ||
|
||
/* Interface to ask for the number of unacknowleged messages */ | ||
|
||
// Unacknowledged messages until an iterator (used in await) | ||
int getTotalUnackedMessagesUntil( | ||
const std::list<PendingAsyncMpiMessage>::iterator& msgIt); | ||
|
||
// Unacknowledged messages in the whole buffer (used in recv) | ||
int getTotalUnackedMessages(); | ||
|
||
private: | ||
std::list<PendingAsyncMpiMessage> pendingMsgs; | ||
|
||
std::list<PendingAsyncMpiMessage>::iterator getFirstNullMsgUntil( | ||
const std::list<PendingAsyncMpiMessage>::iterator& msgIt); | ||
}; | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#include <faabric/scheduler/MpiMessageBuffer.h> | ||
#include <faabric/util/logging.h> | ||
|
||
namespace faabric::scheduler { | ||
typedef std::list<MpiMessageBuffer::PendingAsyncMpiMessage>::iterator | ||
MpiMessageIterator; | ||
bool MpiMessageBuffer::isEmpty() | ||
{ | ||
return pendingMsgs.empty(); | ||
} | ||
|
||
int MpiMessageBuffer::size() | ||
{ | ||
return pendingMsgs.size(); | ||
} | ||
|
||
void MpiMessageBuffer::addMessage(PendingAsyncMpiMessage msg) | ||
{ | ||
pendingMsgs.push_back(msg); | ||
} | ||
|
||
void MpiMessageBuffer::deleteMessage(const MpiMessageIterator& msgIt) | ||
{ | ||
pendingMsgs.erase(msgIt); | ||
} | ||
|
||
MpiMessageIterator MpiMessageBuffer::getRequestPendingMsg(int requestId) | ||
{ | ||
// The request id must be in the MMB, as an irecv must happen before an | ||
// await | ||
MpiMessageIterator msgIt = | ||
std::find_if(pendingMsgs.begin(), | ||
pendingMsgs.end(), | ||
[requestId](PendingAsyncMpiMessage pendingMsg) { | ||
return pendingMsg.requestId == requestId; | ||
}); | ||
|
||
// If it's not there, error out | ||
if (msgIt == pendingMsgs.end()) { | ||
SPDLOG_ERROR("Asynchronous request id not in buffer: {}", requestId); | ||
throw std::runtime_error("Async request not in buffer"); | ||
} | ||
|
||
return msgIt; | ||
} | ||
|
||
MpiMessageIterator MpiMessageBuffer::getFirstNullMsgUntil( | ||
const MpiMessageIterator& msgItEnd) | ||
{ | ||
return std::find_if( | ||
pendingMsgs.begin(), msgItEnd, [](PendingAsyncMpiMessage pendingMsg) { | ||
return pendingMsg.msg == nullptr; | ||
}); | ||
} | ||
|
||
MpiMessageIterator MpiMessageBuffer::getFirstNullMsg() | ||
{ | ||
return getFirstNullMsgUntil(pendingMsgs.end()); | ||
} | ||
|
||
int MpiMessageBuffer::getTotalUnackedMessagesUntil( | ||
const MpiMessageIterator& msgItEnd) | ||
{ | ||
MpiMessageIterator firstNull = getFirstNullMsgUntil(msgItEnd); | ||
return std::distance(firstNull, msgItEnd); | ||
} | ||
|
||
int MpiMessageBuffer::getTotalUnackedMessages() | ||
{ | ||
MpiMessageIterator firstNull = getFirstNullMsg(); | ||
return std::distance(firstNull, pendingMsgs.end()); | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have changed these to accomodate for the new port offset per world.