diff --git a/.azure-pipelines-templates/simulation.yml b/.azure-pipelines-templates/simulation.yml deleted file mode 100644 index 633a3d03b4a3..000000000000 --- a/.azure-pipelines-templates/simulation.yml +++ /dev/null @@ -1,33 +0,0 @@ -jobs: - - job: Simulation - displayName: Simulation - variables: - Codeql.SkipTaskAutoInjection: true - skipComponentGovernanceDetection: true - timeoutInMinutes: 120 - - ${{ insert }}: ${{ parameters.env }} - - steps: - - checkout: self - clean: true - fetchDepth: 1 - - - script: | - set -ex - sudo apt update - sudo apt install -y default-jre - python3 ./tla/install_deps.py - displayName: Setup - - - script: ./tlc.sh -workers auto -simulate -depth 500 consensus/SIMccfraft.tla -dumpTrace tla SIMccfraft.trace.tla -dumpTrace json SIMccfraft.json - env: - SIM_TIMEOUT: ${{ parameters.timeout }} - workingDirectory: tla - displayName: Simulation - - - task: PublishPipelineArtifact@1 - inputs: - artifactName: "Simulation Traces" - targetPath: tla - condition: or(failed(), canceled()) diff --git a/.azure-pipelines-templates/trace_validation.yml b/.azure-pipelines-templates/trace_validation.yml deleted file mode 100644 index f2cf7d4f22f8..000000000000 --- a/.azure-pipelines-templates/trace_validation.yml +++ /dev/null @@ -1,37 +0,0 @@ -steps: - - script: | - set -ex - sudo apt update - sudo apt install -y default-jre parallel - python3 ./tla/install_deps.py - displayName: "Install TLA dependencies" - - - ${{ if eq(parameters.suffix, 'Tracing') }}: - - script: | - set -ex - set -o pipefail - cd tla/ - mkdir -p traces/consensus - mv ../build/consensus traces/ - parallel 'JVM_OPTIONS=-Dtlc2.tool.queue.IStateQueue=StateDeque JSON={} ./tlc.sh -dump dot,constrained,colorize,actionlabels {}.dot -dumpTrace tla {}.trace.tla -dumpTrace json {}.trace.json consensus/Traceccfraft.tla' ::: $(ls traces/consensus/*.ndjson) - displayName: "Run Consensus Trace Validation" - - - ${{ if eq(parameters.suffix, 'Debug') }}: - - script: | - set -ex - set -o pipefail - cd tla/ - mkdir -p traces/consistency - mv ../build/consistency/trace.ndjson traces/consistency/ - # Note that -Dtlc2.tool.queue.IStateQueue=StateDeque effectively - # causes TLC to run a depth-first search rather than a breadth-first search, which tends to - # be more efficient in the case of trace validation including non-deterministic steps. - JVM_OPTIONS=-Dtlc2.tool.queue.IStateQueue=StateDeque JSON=traces/consistency/trace.ndjson ./tlc.sh -dump dot,constrained,colorize,actionlabels traces/consistency/trace.dot -dumpTrace tla traces/consistency/trace.trace.tla -dumpTrace json traces/consistency/trace.trace.json consistency/TraceMultiNodeReads.tla - ls traces - displayName: "Run Consistency Trace Validation" - - - task: PublishPipelineArtifact@1 - inputs: - artifactName: "Trace Validation Output ${{ parameters.suffix }}" - targetPath: tla/traces - condition: or(failed(), canceled()) diff --git a/.github/workflows/ci-verification.yml b/.github/workflows/ci-verification.yml index 623889ad3694..a6957c9b0556 100644 --- a/.github/workflows/ci-verification.yml +++ b/.github/workflows/ci-verification.yml @@ -24,40 +24,25 @@ jobs: runs-on: [self-hosted, 1ES.Pool=gha-virtual-ccf-sub] container: image: ghcr.io/microsoft/ccf/ci/default:build-25-07-2024 + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - - run: | + - name: Install TLC dependencies + run: | sudo apt update sudo apt install -y default-jre - python3 ./tla/install_deps.py - - - name: consistency/MCSingleNode.cfg - run: | - cd tla/ - ./tlc.sh -workers auto consistency/MCSingleNode.tla -dumpTrace json MCSingleNode.json - - - name: consistency/MCSingleNodeReads.cfg - run: | - cd tla/ - ./tlc.sh -workers auto consistency/MCSingleNodeReads.tla -dumpTrace json MCSingleNodeReads.json - - - name: consistency/MCMultiNode.cfg - run: | - cd tla/ - ./tlc.sh -workers auto consistency/MCMultiNode.tla -dumpTrace json MCMultiNode.json - - - name: consistency/MCMultiNodeReads.cfg - run: | - cd tla/ - ./tlc.sh -workers auto consistency/MCMultiNodeReads.tla -dumpTrace json MCMultiNodeReads.json + python3 install_deps.py - - name: consistency/MCMultiNodeReadsAlt.cfg - run: | - cd tla/ - ./tlc.sh -workers auto consistency/MCMultiNodeReadsAlt.tla -dumpTrace json MCMultiNodeReadsAlt.json + - run: ./tlc.py mc consistency/MCSingleNode.tla + - run: ./tlc.py mc consistency/MCSingleNodeReads.tla + - run: ./tlc.py mc consistency/MCMultiNode.tla + - run: ./tlc.py mc consistency/MCMultiNodeReads.tla + - run: ./tlc.py mc consistency/MCMultiNodeReadsAlt.tla - - name: Upload TLC's out file as an artifact. Can be imported into the TLA+ Toolbox. + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: @@ -69,45 +54,41 @@ jobs: counterexamples-consistency: name: Counterexamples - Consistency runs-on: ubuntu-latest + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - - run: python3 ./tla/install_deps.py - - - name: consistency/MCSingleNodeCommitReachability.cfg - run: | - cd tla/ - ./tlc_debug.sh -workers auto -config consistency/MCSingleNodeCommitReachability.cfg consistency/MCSingleNodeReads.tla - - - name: consistency/MCMultiNodeCommitReachability.cfg - run: | - cd tla/ - ./tlc_debug.sh -workers auto -config consistency/MCMultiNodeCommitReachability.cfg consistency/MCMultiNodeReads.tla - - - name: consistency/MCMultiNodeInvalidReachability.cfg + - name: Install TLC dependencies run: | - cd tla/ - ./tlc_debug.sh -workers auto -config consistency/MCMultiNodeInvalidReachability.cfg consistency/MCMultiNodeReads.tla + sudo apt update + sudo apt install -y default-jre + python3 install_deps.py - - name: consistency/MCMultiNodeReadsNotLinearizable.cfg - run: | - cd tla/ - ./tlc_debug.sh -workers auto -config consistency/MCMultiNodeReadsNotLinearizable.cfg consistency/MCMultiNodeReads.tla + - run: ./tlc_debug.sh --config consistency/MCSingleNodeCommitReachability.cfg mc consistency/MCSingleNodeReads.tla + - run: ./tlc_debug.sh --config consistency/MCMultiNodeCommitReachability.cfg mc consistency/MCMultiNodeReads.tla + - run: ./tlc_debug.sh --config consistency/MCMultiNodeInvalidReachability.cfg mc consistency/MCMultiNodeReads.tla + - run: ./tlc_debug.sh --config consistency/MCMultiNodeReadsNotLinearizable.cfg mc consistency/MCMultiNodeReads.tla simulation-consistency: name: Simulation - Consistency runs-on: ubuntu-latest + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - - run: python3 ./tla/install_deps.py - - - name: consistency/MultiNodeReads.cfg + - name: Install TLC dependencies run: | - cd tla/ - ./tlc.sh -workers auto -simulate num=500 -depth 50 consistency/MultiNodeReads.tla -dumpTrace json MultiNodeReads.json + sudo apt update + sudo apt install -y default-jre + python3 install_deps.py - - name: Upload traces in TLA and JSON format + - run: ./tlc.py sim --num 500 --depth 50 consistency/MultiNodeReads.tla + + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: @@ -121,33 +102,23 @@ jobs: runs-on: [self-hosted, 1ES.Pool=gha-virtual-ccf-sub] container: image: ghcr.io/microsoft/ccf/ci/default:build-25-07-2024 + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - - run: | + - name: Install TLC dependencies + run: | sudo apt update sudo apt install -y default-jre - python3 ./tla/install_deps.py - - - name: MCabs.cfg - run: | - set -x - cd tla/ - ./tlc.sh -workers auto consensus/MCabs.tla -dumpTrace tla MCabs.trace.tla -dumpTrace json MCabs.json - - - name: MCccfraft - Configurations=1C2N - run: | - set -x - cd tla/ - Configurations=1C2N MaxTermLimit=2 RequestLimit=3 ./tlc.sh -workers auto consensus/MCccfraft.tla -dumpTrace tla MCccfraft1C2NT2R3.trace.tla -dumpTrace json MCccfraft1C2NT2R3.json + python3 install_deps.py - - name: MCccfraft - Configurations=1C3N - run: | - set -x - cd tla/ - Configurations=1C3N MaxTermLimit=2 RequestLimit=3 ./tlc.sh -workers auto consensus/MCccfraft.tla -dumpTrace tla MCccfraft1C3N2T3R.trace.tla -dumpTrace json MCccfraft1C3N2T3R.json + - run: ./tlc.py mc consensus/MCabs.tla + - run: ./tlc.py --trace-name 1C2N mc --term-count 2 --request-count 2 --raft-configs 1C2N consensus/MCccfraft.tla + - run: ./tlc.py --trace-name 1C3N mc --term-count 2 --request-count 2 --raft-configs 1C3N consensus/MCccfraft.tla - - name: Upload TLC's out file as an artifact. Can be imported into the TLA+ Toolbox. + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: @@ -159,29 +130,28 @@ jobs: simulation-consensus: name: Simulation - Consensus runs-on: ubuntu-latest + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - - run: | + - name: Install TLC dependencies + run: | sudo apt update sudo apt install -y default-jre - python3 ./tla/install_deps.py + python3 install_deps.py - - name: Simulation - run: | - set -x - cd tla/ - ./tlc.sh -workers auto -simulate -depth 500 consensus/SIMccfraft.tla -dumpTrace tla SIMccfraft.trace.tla -dumpTrace json SIMccfraft.json - env: - SIM_TIMEOUT: 1200 + - run: ./tlc.py sim consensus/SIMccfraft.tla - - name: Upload artifacts. + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: name: tlc-simulation-consensus path: | - tla/* + tla/consensus/*_TTrace_*.tla + tla/*.json trace-validation-consensus: name: Trace Validation - Consensus @@ -193,11 +163,11 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - run: | + - name: Install TLC dependencies + run: | sudo apt update sudo apt install -y default-jre parallel python3 ./tla/install_deps.py - shell: bash - name: "Build" run: | @@ -223,7 +193,7 @@ jobs: cd tla/ mkdir -p traces/consensus mv ../build/consensus traces/ - parallel 'JVM_OPTIONS=-Dtlc2.tool.queue.IStateQueue=StateDeque JSON={} ./tlc.sh -dump dot,constrained,colorize,actionlabels {}.dot -dumpTrace tla {}.trace.tla -dumpTrace json {}.trace.json consensus/Traceccfraft.tla' ::: $(ls traces/consensus/*.ndjson) + parallel './tlc.py --workers 1 --dot --trace-name {} tv --ccf-raft-trace {} consensus/Traceccfraft.tla' ::: $(ls traces/consensus/*.ndjson) shell: bash - name: Upload artifacts. diff --git a/.github/workflows/long-verification.yml b/.github/workflows/long-verification.yml index 9507f362fd2c..77b3f467c05a 100644 --- a/.github/workflows/long-verification.yml +++ b/.github/workflows/long-verification.yml @@ -23,21 +23,20 @@ jobs: runs-on: [self-hosted, 1ES.Pool=gha-virtual-ccf-sub] container: image: ghcr.io/microsoft/ccf/ci/default:build-25-07-2024 + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - run: | sudo apt update sudo apt install -y default-jre - python3 ./tla/install_deps.py + python3 install_deps.py - - name: MCccfraft - Configurations=2C2N (atomic reconf) MaxTermLimit=4 RequestLimit=3 NoCheckQuorum - run: | - set -x - cd tla/ - Configurations=2C2N MaxTermLimit=4 RequestLimit=1 NoCheckQuorum= ./tlc.sh -workers auto consensus/MCccfraft.tla -dumpTrace tla MCccfraft2C2N4T1R.trace.tla -dumpTrace json MCccfraft2C2N4T1R.json + - run: ./tlc.py --trace-name 2C2N mc --term-count 2 --request-count 0 --raft-configs 2C2N --disable-check-quorum consensus/MCccfraft.tla - - name: Upload TLC's out file as an artifact. Can be imported into the TLA+ Toolbox. + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: @@ -52,21 +51,20 @@ jobs: runs-on: [self-hosted, 1ES.Pool=gha-virtual-ccf-sub] container: image: ghcr.io/microsoft/ccf/ci/default:build-25-07-2024 + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - run: | sudo apt update sudo apt install -y default-jre - python3 ./tla/install_deps.py + python3 install_deps.py - - name: MCccfraft - Configurations=3C2N MaxTermLimit=4 RequestLimit=1 NoCheckQuorum - run: | - set -x - cd tla/ - Configurations=3C2N MaxTermLimit=4 RequestLimit=1 NoCheckQuorum= ./tlc.sh -workers auto consensus/MCccfraft.tla -dumpTrace tla MCccfraft3C2N4T1R.trace.tla -dumpTrace json MCccfraft3C2N4T1R.json + - run: ./tlc.py --trace-name 3C2N mc --term-count 2 --request-count 0 --raft-configs 3C2N --disable-check-quorum consensus/MCccfraft.tla - - name: Upload TLC's out file as an artifact. Can be imported into the TLA+ Toolbox. + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: @@ -79,26 +77,24 @@ jobs: name: Simulation - Consensus if: ${{ contains(github.event.pull_request.labels.*.name, 'run-long-verification') }} runs-on: ubuntu-latest + defaults: + run: + working-directory: tla steps: - uses: actions/checkout@v4 - run: | sudo apt update sudo apt install -y default-jre - python3 ./tla/install_deps.py + python3 install_deps.py - - name: Simulation - run: | - set -x - cd tla/ - ./tlc.sh -workers auto -simulate -depth 500 consensus/SIMccfraft.tla -dumpTrace tla SIMccfraft.trace.tla -dumpTrace json SIMccfraft.json - env: - SIM_TIMEOUT: 3000 + - run: ./tlc.py sim --max-seconds 3000 --depth 500 consensus/SIMccfraft.tla - - name: Upload artifacts. + - name: Upload TLC traces uses: actions/upload-artifact@v4 if: ${{ failure() }} with: name: tlc-simulation-consensus path: | - tla/* + tla/consensus/*_TTrace_*.tla + tla/*.json diff --git a/doc/architecture/raft_tla.rst b/doc/architecture/raft_tla.rst index 2a88ce60a4a6..19f6ac410a1a 100644 --- a/doc/architecture/raft_tla.rst +++ b/doc/architecture/raft_tla.rst @@ -21,7 +21,7 @@ To download and then run TLC, simply execute: $ cd tla $ python install_deps.py - $ ./tlc.sh consensus/MCccfraft.tla + $ ./tlc.py consensus/MCccfraft.tla .. tip:: TLC works best if it can utilize all system resources. Use the ``-workers auto`` option to use all cores. @@ -29,7 +29,7 @@ You can also check the consensus specification including reconfiguration as foll .. code-block:: bash - $ Configurations=3C2N MaxTermLimit=4 RequestLimit=1 NoCheckQuorum= ./tlc.sh consensus/MCccfraft.tla + $ ./tlc.py --term-count 2 --request-count 0 --raft-configs 3C2N --disable-check-quorum consensus/MCccfraft.tla Using TLC to exhaustively check our models can take any time between minutes (for small configurations) and days (especially for the full consensus model with reconfiguration) on a 128 core VM (specifically, we used an `Azure HBv3 instance `_. @@ -42,6 +42,6 @@ Trace validation It is possible to produce fresh traces quickly from the driver by running the ``make_traces.sh`` script from the ``tla`` directory. -Calling the trace validation on, for example, the ``append`` scenario can then be done with ``JSON=../build/append.ndjson ./tlc.sh consensus/Traceccfraft.tla``. +Calling the trace validation on, for example, the ``append`` scenario can then be done with ``./tlc.py --driver-trace ../build/append.ndjson consensus/Traceccfraft.tla``. CCF also provides a command line trace visualizer to aid debugging, for example, the ``append`` scenario can be visualized with ``python ../tests/trace_viz.py ../build/append.ndjson``. diff --git a/scripts/notice-check.py b/scripts/notice-check.py index 2676f4784858..5699cae1bb6f 100644 --- a/scripts/notice-check.py +++ b/scripts/notice-check.py @@ -25,7 +25,11 @@ ".cpp": [SLASH_PREFIXED], ".h": HEADERS_WITH_PRAGMAS, ".hpp": HEADERS_WITH_PRAGMAS, - ".py": [HASH_PREFIXED], + ".py": [ + HASH_PREFIXED, + # May have a shebang before the license + "#!/usr/bin/env python3" + os.linesep + HASH_PREFIXED, + ], ".sh": [ HASH_PREFIXED, # May have a shebang before the license diff --git a/tla/consensus/MCccfraft.tla b/tla/consensus/MCccfraft.tla index e9844328a74a..449316eafdc1 100644 --- a/tla/consensus/MCccfraft.tla +++ b/tla/consensus/MCccfraft.tla @@ -6,29 +6,29 @@ CONSTANTS Configurations == LET default == <<{NodeOne, NodeTwo}>> IN - IF "Configurations" \in DOMAIN IOEnv THEN + IF "RAFT_CONFIGS" \in DOMAIN IOEnv THEN \* Don't parse and process the string Configurations but keep it simple and just check for known values. - CASE IOEnv.Configurations = "1C1N" -> <<{NodeOne}>> - [] IOEnv.Configurations = "1C2N" -> default - [] IOEnv.Configurations = "1C3N" -> <<{NodeOne, NodeTwo, NodeThree}>> - [] IOEnv.Configurations = "2C2N" -> <<{NodeOne}, {NodeTwo}>> - [] IOEnv.Configurations = "3C2N" -> <<{NodeOne}, {NodeOne, NodeTwo}, {NodeTwo}>> - [] OTHER -> Print("Unknown value for env var Configurations. Falling back to <<{NodeOne, NodeTwo}>>.", default) - ELSE Print("Found no env var Configurations. Falling back to <<{NodeOne, NodeTwo}>>.", default) + CASE IOEnv.RAFT_CONFIGS = "1C1N" -> <<{NodeOne}>> + [] IOEnv.RAFT_CONFIGS = "1C2N" -> default + [] IOEnv.RAFT_CONFIGS = "1C3N" -> <<{NodeOne, NodeTwo, NodeThree}>> + [] IOEnv.RAFT_CONFIGS = "2C2N" -> <<{NodeOne}, {NodeTwo}>> + [] IOEnv.RAFT_CONFIGS = "3C2N" -> <<{NodeOne}, {NodeOne, NodeTwo}, {NodeTwo}>> + [] OTHER -> Print("Unsupported value for RAFT_CONFIGS, defaulting to 1C2N: <<{NodeOne, NodeTwo}>>.", default) + ELSE Print("RAFT_CONFIGS is not set, defaulting to 1C2N: <<{NodeOne, NodeTwo}>>.", default) ASSUME Configurations \in Seq(SUBSET Servers) -MaxTermLimit == - IF "MaxTermLimit" \in DOMAIN IOEnv - THEN atoi(IOEnv.MaxTermLimit) - ELSE Print("Found no env var MaxTermLimit. Falling back to 2 terms.", 2) -ASSUME MaxTermLimit \in Nat +TermCount == + IF "TERM_COUNT" \in DOMAIN IOEnv + THEN atoi(IOEnv.TERM_COUNT) + ELSE Print("TERM_COUNT is not set, defaulting to 0", 0) +ASSUME TermCount \in Nat \* Limit on client requests -RequestLimit == - IF "RequestLimit" \in DOMAIN IOEnv - THEN atoi(IOEnv.RequestLimit) - ELSE Print("Found no env var RequestLimit. Falling back to 3 requests.", 3) -ASSUME RequestLimit \in Nat +RequestCount == + IF "REQUEST_COUNT" \in DOMAIN IOEnv + THEN atoi(IOEnv.REQUEST_COUNT) + ELSE Print("REQUEST_COUNT is not set, defaulting to 3", 3) +ASSUME RequestCount \in Nat ToServers == UNION Range(Configurations) @@ -36,7 +36,7 @@ ToServers == CCF == INSTANCE ccfraft MCCheckQuorum(i) == - IF "NoCheckQuorum" \in DOMAIN IOEnv THEN FALSE ELSE CCF!CheckQuorum(i) + IF "DISABLE_CHECK_QUORUM" \in DOMAIN IOEnv THEN FALSE ELSE CCF!CheckQuorum(i) \* This file controls the constants as seen below. \* In addition to basic settings of how many nodes are to be model checked, @@ -55,7 +55,7 @@ MCChangeConfigurationInt(i, newConfiguration) == \* constraint below is too restrictive. MCTimeout(i) == \* Limit the term of each server to reduce state space - /\ currentTerm[i] < MaxTermLimit + /\ currentTerm[i] < StartTerm + TermCount \* Limit max number of simultaneous candidates \* We made several restrictions to the state space of Raft. However since we \* made these restrictions, Deadlocks can occur at places that Raft would in @@ -70,8 +70,8 @@ MCTimeout(i) == \* Limit number of requests (new entries) that can be made MCClientRequest(i) == - \* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit - /\ FoldSeq(LAMBDA e, count: IF e.contentType = TypeEntry THEN count + 1 ELSE count, 0, log[i]) < RequestLimit + \* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) <= RequestCount + /\ FoldSeq(LAMBDA e, count: IF e.contentType = TypeEntry THEN count + 1 ELSE count, 0, log[i]) <= RequestCount /\ CCF!ClientRequest(i) MCSignCommittableMessages(i) == @@ -143,12 +143,12 @@ DebugNotTooManySigsInv == \* The inital log is up to 4 entries long + one log entry per request/reconfiguration + one signature per request/reconfiguration or new view (no consecutive sigs except across views) MaxLogLength == - 4 + ((RequestLimit + Len(Configurations)) * 2) + MaxTermLimit + 4 + ((RequestCount + Len(Configurations)) * 2) + TermCount MappingToAbs == INSTANCE abs WITH Servers <- Servers, - Terms <- StartTerm..MaxTermLimit, + Terms <- StartTerm..(StartTerm + TermCount), MaxLogLength <- MaxLogLength, cLogs <- [i \in Servers |-> [j \in 1..commitIndex[i] |-> log[i][j].term]] diff --git a/tla/consensus/Traceccfraft.tla b/tla/consensus/Traceccfraft.tla index b7239c7d01c3..79c945aa75e1 100644 --- a/tla/consensus/Traceccfraft.tla +++ b/tla/consensus/Traceccfraft.tla @@ -77,12 +77,12 @@ IsMessage(msg, dst, src, logline) == ASSUME TLCGet("config").mode = "bfs" JsonFile == - IF "JSON" \in DOMAIN IOEnv THEN IOEnv.JSON ELSE "../traces/consensus/append.ndjson" + IF "CCF_RAFT_TRACE" \in DOMAIN IOEnv THEN IOEnv.CCF_RAFT_TRACE ELSE "../traces/consensus/append.ndjson" JsonLog == \* Deserialize the System log as a sequence of records from the log file. \* Run TLC from under the tla/ directory with: - \* $ JSON=../build/startup.ndjson ./tlc.sh consensus/Traceccfraft.tla + \* ./tlc.py --ccf-raft-trace ../build/startup.ndjson consensus/Traceccfraft.tla \* Traces can be generated with: ./make_traces.sh, also under the tla/ directory. ndJsonDeserialize(JsonFile) diff --git a/tla/consistency/TraceMultiNodeReads.tla b/tla/consistency/TraceMultiNodeReads.tla index 2dbaa38d80c0..614e0f0b0e31 100644 --- a/tla/consistency/TraceMultiNodeReads.tla +++ b/tla/consistency/TraceMultiNodeReads.tla @@ -17,7 +17,7 @@ JsonFile == JsonLog == \* Deserialize the System log as a sequence of records from the log file. \* Run TLC from under the tla/ directory with: - \* $ JSON=../build/consistency/trace.ndjson ./tlc.sh consistency/TraceMultiNodeReads.tla + \* ./tlc.py --ccf-raft-trace ../build/consistency/trace.ndjson consistency/TraceMultiNodeReads.tla \* Traces can be generated by running ./tests.sh -VV -R consistency_trace_validation under build/ \* The clients execute transactions sequentially, and so the log is ordered by tx ndJsonDeserialize(JsonFile) diff --git a/tla/tlc.py b/tla/tlc.py new file mode 100755 index 000000000000..63a9f0f01de9 --- /dev/null +++ b/tla/tlc.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the Apache 2.0 License. +# +# Python TLC wrapper script for the CCF project +# Goals: +# - No dependencies, no venv, no pip install +# - Set sensible defaults with an eye on performance +# - Capture useful switches for CI, debugging +# - Expose specification configuration through CLI +# - Provide a useful --help, and basic sanity checks + +import os +import sys +import shlex +import argparse +import pprint +import pathlib + +DEFAULT_JVM_ARGS = [ + "-XX:+UseParallelGC", # Use parallel garbage collection, for performance +] + + +DEFAULT_CLASSPATH_ARGS = ["-cp", "tla2tools.jar:CommunityModules-deps.jar"] + +USAGE = """ +To forward arguments directly to TLC that the wrapper does not support, +run with the -n flag, and evaluate the output beforehand, e.g. `./tlc.py -n mc Spec.tla` -debugger +""" + + +def cli(): + parser = argparse.ArgumentParser( + description="TLC model checker wrapper for the CCF project", usage=USAGE + ) + + # Common options for all commands + parser.add_argument( + "-x", + action="store_true", + help="Print out command and environment before running", + ) + parser.add_argument( + "-n", + action="store_true", + help="Print out command and environment, but do not run", + ) + # Changes to TLC defaults + parser.add_argument( + "--disable-cdot", action="store_true", help="Disable \\cdot support" + ) + parser.add_argument( + "--workers", + type=str, + default="auto", + help="Number of workers to use, default is 'auto'", + ) + parser.add_argument( + "--checkpoint", type=int, default=0, help="Checkpoint interval, default is 0" + ) + parser.add_argument( + "--lncheck", + type=str, + choices=["final", "default"], + default="final", + help="Liveness check, set to 'default' to run periodically or 'final' to run once at the end, default is final", + ) + # Convenient shortcuts applicable to all commands + parser.add_argument( + "--jmx", + action="store_true", + help="Enable JMX to allow monitoring, use echo 'get -b tlc2.tool:type=ModelChecker CurrentState' | jmxterm -l localhost:55449 -i to query", + ) + parser.add_argument( + "--dot", action="store_true", help="Generate a dot file for the state graph" + ) + parser.add_argument( + "--trace-name", + type=str, + default=None, + help="Name to give to the trace files, defaults to the config name if provided, otherwise to the base name of the spec", + ) + parser.add_argument( + "--config", + type=pathlib.Path, + default=None, + help="Path to the TLA+ configuration, defaults to spec name", + ) + parser.add_argument( + "--difftrace", + action="store_true", + help="When printing a trace, show only the differences between states", + ) + + subparsers = parser.add_subparsers(dest="cmd") + + # Model checking + mc = subparsers.add_parser("mc", help="Model checking") + # Control spec options of MCccfraft + mc.add_argument( + "--term-count", + type=int, + default=0, + help="Number of terms the nodes are allowed to advance through, defaults to 0", + ) + mc.add_argument( + "--request-count", + type=int, + default=3, + help="Number of requests the nodes are allowed to advance through, defaults to 3", + ) + mc.add_argument( + "--raft-configs", + type=str, + default="1C2N", + help="Raft configuration sequence, defaults to 1C2N", + ) + mc.add_argument( + "--disable-check-quorum", action="store_true", help="Disable CheckQuorum action" + ) + + # Trace validation + tv = subparsers.add_parser("tv", help="Trace validation") + # DFS is a good default for trace validation + tv.add_argument( + "--disable-dfs", + action="store_true", + help="Set TLC to use depth-first search", + ) + tv.add_argument( + "--ccf-raft-trace", + type=pathlib.Path, + default=None, + help="Path to a CCF Raft trace .ndjson file, for example produced by make_traces.sh", + ) + + # Simulation + sim = subparsers.add_parser("sim", help="Simulation") + sim.add_argument( + "--num", + type=int, + default=None, + help="Number of behaviours to simulate per worker thread", + ) + sim.add_argument( + "--depth", + type=int, + default=500, + help="Set the depth of the simulation, defaults to 500", + ) + sim.add_argument( + "--max-seconds", + type=int, + default=1200, + help="Set the timeout of the simulation, defaults to 1200 seconds", + ) + + parser.add_argument( + "spec", type=pathlib.Path, help="Path to the TLA+ specification" + ) + + return parser + + +CI = "CI" in os.environ + + +if __name__ == "__main__": + env = {} + args = cli().parse_args() + jvm_args = DEFAULT_JVM_ARGS + cp_args = DEFAULT_CLASSPATH_ARGS + tlc_args = [ + "-workers", + args.workers, + "-checkpoint", + str(args.checkpoint), + "-lncheck", + args.lncheck, + ] + trace_name = args.trace_name or os.path.basename(args.config or args.spec).replace( + ".tla", "" + ) + + if CI: + # When run in CI, format output for GitHub, and participate in statistics collection + jvm_args.append("-Dtlc2.TLC.ide=Github") + jvm_args.append( + "-Dutil.ExecutionStatisticsCollector.id=be29f6283abeed2fb1fd0be898bc6601" + ) + + if args.trace_name or CI: + # When run in CI, or told explicitly, dump a trace + tlc_args.extend(["-dumpTrace", "json", f"{trace_name}.trace.json"]) + if args.jmx: + jvm_args.append("-Dcom.sun.management.jmxremote") + jvm_args.append("-Dcom.sun.management.jmxremote.port=55449") + jvm_args.append("-Dcom.sun.management.jmxremote.ssl=false") + jvm_args.append("-Dcom.sun.management.jmxremote.authenticate=false") + if not args.disable_cdot: + jvm_args.append("-Dtlc2.tool.impl.Tool.cdot=true") + if args.config is not None: + tlc_args.extend(["-config", args.config]) + if args.dot: + tlc_args.extend( + ["-dump", "dot,constrained,colorize,actionlabels", f"{trace_name}.dot"] + ) + if args.difftrace: + tlc_args.extend(["-difftrace"]) + + if args.cmd == "mc": + if args.term_count is not None: + env["TERM_COUNT"] = str(args.term_count) + if args.request_count is not None: + env["REQUEST_COUNT"] = str(args.request_count) + if args.raft_configs is not None: + env["RAFT_CONFIGS"] = args.raft_configs + if args.disable_check_quorum is not None: + env["DISABLE_CHECK_QUORUM"] = "true" + elif args.cmd == "tv": + if not args.disable_dfs: + jvm_args.append("-Dtlc2.tool.queue.IStateQueue=StateDeque") + if args.ccf_raft_trace is not None: + env["CCF_RAFT_TRACE"] = args.ccf_raft_trace + elif args.cmd == "sim": + tlc_args.extend(["-simulate"]) + if args.num is not None: + tlc_args.extend([f"num={args.num}"]) + env["SIM_TIMEOUT"] = str(args.max_seconds) + if args.depth is not None: + tlc_args.extend(["-depth", str(args.depth)]) + else: + raise ValueError(f"Unknown command: {args.cmd}") + + cmd = ["java"] + jvm_args + cp_args + ["tlc2.TLC"] + tlc_args + [args.spec] + if args.x or args.n: + env_prefix = " ".join( + f"{key}={shlex.quote(value)}" for key, value in env.items() + ) + print(f"env {env_prefix} {shlex.join(str(arg) for arg in cmd)}") + if args.n: + sys.exit(0) + merged_env = os.environ.copy() + merged_env.update(env) + os.execvpe("java", cmd, merged_env) diff --git a/tla/tlc.sh b/tla/tlc.sh deleted file mode 100755 index e2981d753ede..000000000000 --- a/tla/tlc.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the Apache 2.0 License. -# Original License below -# Adapted from: https://github.com/pmer/tla-bin - -if [ "${CI}" ] || [ "${SYSTEM_TEAMFOUNDATIONCOLLECTIONURI}" ]; then - JVM_OPTIONS+=("-Dtlc2.TLC.ide=Github" "-Dutil.ExecutionStatisticsCollector.id=be29f6283abeed2fb1fd0be898bc6601") -fi - -# Run with JMX enabled, allows monitoring the current state by running -# echo 'get -b tlc2.tool:type=ModelChecker CurrentState' | jmxterm -l localhost:55449 -i -if [ "${JMX}" ]; then - JVM_OPTIONS+=("-Dcom.sun.management.jmxremote" "-Dcom.sun.management.jmxremote.port=55449" "-Dcom.sun.management.jmxremote.ssl=false" "-Dcom.sun.management.jmxremote.authenticate=false") -fi - -# See https://docs.oracle.com/en/java/javase/20/gctuning/available-collectors.html#GUID-F215A508-9E58-40B4-90A5-74E29BF3BD3C -# > If (a) peak application performance is the first priority and (b) there are no pause-time requirements or pauses of one second or longer are acceptable, -# > then let the VM select the collector or select the parallel collector with -XX:+UseParallelGC. -# Simulation, nor Model Checking seem to work well with G1GC, but Trace Validation does not for some reason, and is much slower unless -XX:+UseParallelGC is used. -# In all cases, pauses don't matter, only throughput does. - -exec java -XX:+UseParallelGC -Dtlc2.tool.impl.Tool.cdot=true "${JVM_OPTIONS[@]}" -cp tla2tools.jar:CommunityModules-deps.jar tlc2.TLC "$@" -checkpoint 0 -lncheck final - - -# Original License -# Copyright 2017 Paul Merrill - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. \ No newline at end of file diff --git a/tla/tlc_debug.sh b/tla/tlc_debug.sh index e58c6aa12126..9cd178d5e43d 100755 --- a/tla/tlc_debug.sh +++ b/tla/tlc_debug.sh @@ -6,7 +6,7 @@ # The debug invariant(s) should be the only invariant(s), otherwise # this script might falsely return without errors -./tlc.sh "$@" +./tlc.py "$@" status=$? # TLC safety violation returns error code 12