From deebc0aab0f7ff32bf31ff6da4074828bb1784fa Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 16 Jun 2020 21:27:26 -0700 Subject: [PATCH] #672: docs: write markdown about TD --- .../termination/termination_collective.cc | 2 ++ examples/termination/termination_rooted.cc | 3 +- src/term.md | 30 +++++++++++++++++++ src/vt.md | 1 + 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 src/term.md diff --git a/examples/termination/termination_collective.cc b/examples/termination/termination_collective.cc index 5ff5ad33d9..b3454e87d3 100644 --- a/examples/termination/termination_collective.cc +++ b/examples/termination/termination_collective.cc @@ -44,6 +44,7 @@ #include "vt/transport.h" +/// [Collective termination example] using TestMsg = vt::Message; vt::NodeType nextNode() { @@ -98,3 +99,4 @@ int main(int argc, char** argv) { return 0; } +/// [Collective termination example] diff --git a/examples/termination/termination_rooted.cc b/examples/termination/termination_rooted.cc index 905a9be918..828052815e 100644 --- a/examples/termination/termination_rooted.cc +++ b/examples/termination/termination_rooted.cc @@ -44,6 +44,7 @@ #include "vt/transport.h" +/// [Rooted termination example] using TestMsg = vt::Message; vt::NodeType nextNode() { @@ -96,4 +97,4 @@ int main(int argc, char** argv) { return 0; } - +/// [Rooted termination example] diff --git a/src/term.md b/src/term.md new file mode 100644 index 0000000000..e980c3c17f --- /dev/null +++ b/src/term.md @@ -0,0 +1,30 @@ +\page term TerminationDetector +\brief Detect termination of work + +The termination component `vt::term::TerminationDetector`, accessed via +`vt::theTerm()` detects the transitive completion of work following the causal +chain of messages/events across multiple nodes. It provides global termination +to determine when all work is complete and the schedulers can stop +running. Additionally, it enables the creation of epochs (which stamp message +envelopes) to mark messages as part of a work grouping to detect termination of +all causal events related to a subset of messages in the system. + +The termination detector comes with two different detection algorithms: (1) +4-counter wave-based termination for large collective epochs across the whole +system; and, Dijkstra-Scholten parental responsibility termination for rooted +epochs. Epochs are allowed to have other epochs nested within them, thus forming +a graph. The detector tracks the relation between epochs, only making progress +on epochs that do not have a dependency on another epoch terminating first. + +The termination detector also comes with hang detection to detect causes where +no progress can be made due to a user's fault. When a hang is detected, if +configured as so by the user, the detector will dump a DOT graph of the live +epochs and their dependencies. + +\section term-collective-example Example of creating a collective epoch + +\snippet examples/termination/termination_collective.cc Collective termination example + +\section term-rooted-example Example of creating a rooted epoch + +\snippet examples/termination/termination_rooted.cc Rooted termination example diff --git a/src/vt.md b/src/vt.md index f184a3f693..8f9e69c9e9 100644 --- a/src/vt.md +++ b/src/vt.md @@ -55,6 +55,7 @@ management. | \subpage scheduler | `vt::theSched()` | \copybrief scheduler | | \subpage seq | `vt::theSeq()` | \copybrief seq | | \subpage vrtseq | `vt::theVirtualSeq()` | \copybrief vrtseq | +| \subpage term | `vt::theTerm()` | \copybrief term | \section vt-hello-world Example