From 6232707e3126a9075d376267512391e3ae1c3c13 Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Fri, 17 Nov 2023 16:28:15 +0300 Subject: [PATCH 1/6] 3.0 configuration: update replication tutorials --- .../snippets/replication/README.md | 11 - .../instances.enabled/auto_leader/README.md | 11 + .../instances.enabled/manual_leader/README.md | 11 + .../instances.enabled/manual_leader/myapp.lua | 10 - .../instances.enabled/master_master/README.md | 11 + .../master_master/config.yaml | 7 +- .../master_master/instances.yml | 3 +- .../sharded_cluster}/README.md | 0 doc/concepts/replication/index.rst | 10 +- .../replication/repl_architecture.rst | 2 +- .../replication/repl_leader_elect.rst | 130 ++- doc/concepts/replication/repl_sync.rst | 3 +- doc/how-to/replication/index.rst | 7 +- doc/how-to/replication/repl_add_instances.rst | 166 --- doc/how-to/replication/repl_bootstrap.rst | 1003 +++++++++++------ .../replication/repl_bootstrap_auto.rst | 438 +++++++ .../repl_bootstrap_master_master.rst | 250 ++++ doc/how-to/replication/repl_leader_elect.rst | 114 -- .../replication/repl_remove_instances.rst | 225 ---- doc/how-to/replication/repl_sync.rst | 108 -- .../configuration/configuration_reference.rst | 3 + .../reference_lua/box_ctl/promote.rst | 2 +- .../reference_lua/box_info/election.rst | 50 +- .../reference_lua/box_schema/space_create.rst | 11 +- .../concepts/replication/repl_architecture.po | 2 +- 25 files changed, 1524 insertions(+), 1064 deletions(-) delete mode 100644 doc/code_snippets/snippets/replication/README.md create mode 100644 doc/code_snippets/snippets/replication/instances.enabled/auto_leader/README.md create mode 100644 doc/code_snippets/snippets/replication/instances.enabled/manual_leader/README.md create mode 100644 doc/code_snippets/snippets/replication/instances.enabled/master_master/README.md rename doc/code_snippets/snippets/sharding/{ => instances.enabled/sharded_cluster}/README.md (100%) delete mode 100644 doc/how-to/replication/repl_add_instances.rst create mode 100644 doc/how-to/replication/repl_bootstrap_auto.rst create mode 100644 doc/how-to/replication/repl_bootstrap_master_master.rst delete mode 100644 doc/how-to/replication/repl_leader_elect.rst delete mode 100644 doc/how-to/replication/repl_remove_instances.rst delete mode 100644 doc/how-to/replication/repl_sync.rst diff --git a/doc/code_snippets/snippets/replication/README.md b/doc/code_snippets/snippets/replication/README.md deleted file mode 100644 index 6298a1142a..0000000000 --- a/doc/code_snippets/snippets/replication/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Replication - -A sample application demonstrating various replication features. - -## Running - -To run applications placed in [instances.enabled](instances.enabled), go to the `replication` directory in the terminal and execute the `tt start` command, for example: - -```console -$ tt start auto_leader -``` diff --git a/doc/code_snippets/snippets/replication/instances.enabled/auto_leader/README.md b/doc/code_snippets/snippets/replication/instances.enabled/auto_leader/README.md new file mode 100644 index 0000000000..c8256c2a5c --- /dev/null +++ b/doc/code_snippets/snippets/replication/instances.enabled/auto_leader/README.md @@ -0,0 +1,11 @@ +# Master-replica: automated failover + +A sample application demonstrating how to bootstrap a replica set with [automated failover](https://www.tarantool.io/en/doc/latest/how-to/replication/repl_bootstrap_auto/). + +## Running + +To start all instances, execute the following command in the [replication](../../../replication) directory: + +```console +$ tt start auto_leader +``` diff --git a/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/README.md b/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/README.md new file mode 100644 index 0000000000..e6724f65ee --- /dev/null +++ b/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/README.md @@ -0,0 +1,11 @@ +# Master-replica: manual failover + +A sample application demonstrating how to bootstrap a replica set with [manual failover](https://www.tarantool.io/en/doc/latest/how-to/replication/repl_bootstrap/). + +## Running + +To start all instances, execute the following command in the [replication](../../../replication) directory: + +```console +$ tt start manual_leader +``` diff --git a/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/myapp.lua b/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/myapp.lua index 27b0b6bbf6..321db0aab1 100644 --- a/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/myapp.lua +++ b/doc/code_snippets/snippets/replication/instances.enabled/manual_leader/myapp.lua @@ -8,16 +8,6 @@ function create_space() box.space.bands:create_index('primary', { parts = { 'id' } }) end -function create_sync_space() - box.schema.space.create('bands', { is_sync = true }) - box.space.bands:format({ - { name = 'id', type = 'unsigned' }, - { name = 'band_name', type = 'string' }, - { name = 'year', type = 'unsigned' } - }) - box.space.bands:create_index('primary', { parts = { 'id' } }) -end - function load_data() box.space.bands:insert { 1, 'Roxette', 1986 } box.space.bands:insert { 2, 'Scorpions', 1965 } diff --git a/doc/code_snippets/snippets/replication/instances.enabled/master_master/README.md b/doc/code_snippets/snippets/replication/instances.enabled/master_master/README.md new file mode 100644 index 0000000000..ff0d938a0e --- /dev/null +++ b/doc/code_snippets/snippets/replication/instances.enabled/master_master/README.md @@ -0,0 +1,11 @@ +# Master-master + +A sample application demonstrating how to bootstrap a [master-master](https://www.tarantool.io/en/doc/latest/how-to/replication/repl_bootstrap_master_master/) replica set. + +## Running + +To start all instances, execute the following command in the [replication](../../../replication) directory: + +```console +$ tt start master_master +``` diff --git a/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml b/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml index 85c7ee76dc..5b637cadfa 100644 --- a/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml +++ b/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml @@ -25,4 +25,9 @@ groups: database: mode: rw iproto: - listen: 127.0.0.1:3302 \ No newline at end of file + listen: 127.0.0.1:3302 + instance003: + database: + mode: rw + iproto: + listen: 127.0.0.1:3303 \ No newline at end of file diff --git a/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml b/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml index 75e286d69c..6c765b2e67 100644 --- a/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml +++ b/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml @@ -1,2 +1,3 @@ instance001: -instance002: \ No newline at end of file +instance002: +instance003: \ No newline at end of file diff --git a/doc/code_snippets/snippets/sharding/README.md b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster/README.md similarity index 100% rename from doc/code_snippets/snippets/sharding/README.md rename to doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster/README.md diff --git a/doc/concepts/replication/index.rst b/doc/concepts/replication/index.rst index 17194039f2..5d0e33f501 100644 --- a/doc/concepts/replication/index.rst +++ b/doc/concepts/replication/index.rst @@ -10,7 +10,7 @@ Replication allows multiple Tarantool instances to work on copies of the same databases. The databases are kept in sync because each instance can communicate its changes to all the other instances. -This chapter includes the following sections: +This section includes the following topics: .. toctree:: :maxdepth: 2 @@ -20,9 +20,5 @@ This chapter includes the following sections: repl_sync repl_leader_elect -For practical guides to replication, see the :ref:`How-to section `. -You can learn about :ref:`bootstrapping a replica set `, -:ref:`adding instances ` to the replica set -or :ref:`removing them `, -:ref:`using synchronous replication ` -and :ref:`managing leader elections `. +For practical guides to replication, see :ref:`Replication tutorials `. +You can learn about bootstrapping a replica set, adding instances to the replica set, or removing them. diff --git a/doc/concepts/replication/repl_architecture.rst b/doc/concepts/replication/repl_architecture.rst index ed8850816e..a5c1453167 100644 --- a/doc/concepts/replication/repl_architecture.rst +++ b/doc/concepts/replication/repl_architecture.rst @@ -47,7 +47,7 @@ The following are specifics of adding different types of information to the WAL: * Data change operations on **replication-local** spaces (:doc:`created ` with ``is_local = true``) are written to the WAL but are not replicated. -To learn how to enable replication, check the :ref:`Bootstrapping a replica set ` guide. +To learn how to enable replication, check the :ref:`Bootstrapping a replica set ` guide. .. _replication_stages: diff --git a/doc/concepts/replication/repl_leader_elect.rst b/doc/concepts/replication/repl_leader_elect.rst index 1031cd55b3..8805029498 100644 --- a/doc/concepts/replication/repl_leader_elect.rst +++ b/doc/concepts/replication/repl_leader_elect.rst @@ -11,7 +11,7 @@ on the base of Tarantool and decreases dependency on external tools for replica set management. To learn how to configure and monitor automated leader elections, -check the :ref:`how-to guide `. +check :ref:`Managing leader elections `. The following topics are described below: @@ -44,9 +44,9 @@ Leader election is described below. The system behavior can be specified exactly according to the Raft algorithm. To do this: * Ensure that the user has only synchronous spaces. - * Set the :ref:`replication_synchro_quorum ` option to ``N / 2 + 1``. - * Set the :ref:`replication_synchro_timeout ` option to infinity. - * In the :ref:`election_fencing_mode ` option, select either the ``soft`` mode (the default) + * Set the :ref:`replication.synchro_quorum ` option to ``N / 2 + 1``. + * Set the :ref:`replication.synchro_timeout ` option to infinity. + * In the :ref:`replication.election_fencing_mode ` option, select either the ``soft`` mode (the default) or the ``strict`` mode, which is more restrictive. .. _repl_leader_elect_process: @@ -71,11 +71,11 @@ for itself and sends vote requests to other nodes. Upon receiving vote requests, a node votes for the first of them, and then cannot do anything in the same term but wait for a leader to be elected. -The node that collected a quorum of votes defined by the :ref:`replication_synchro_quorum ` parameter +The node that collected a quorum of votes defined by the :ref:`replication.synchro_quorum ` parameter becomes the leader and notifies other nodes about that. Also, a split vote can happen when no nodes received a quorum of votes. In this case, -after a :ref:`random timeout `, +after a random timeout, each node increases its term and starts a new election round if no new vote request with a greater term arrives during this time. Eventually, a leader is elected. @@ -87,7 +87,7 @@ All the non-leader nodes are called *followers*. The nodes that start a new election round are called *candidates*. The elected leader sends heartbeats to the non-leader nodes to let them know it is alive. -In case there are no heartbeats for the period of :ref:`replication_timeout ` * 4, +In case there are no heartbeats for the period of :ref:`replication.timeout ` * 4, a non-leader node starts a new election if the following conditions are met: * The node has a quorum of connections to other cluster members. @@ -96,7 +96,7 @@ a non-leader node starts a new election if the following conditions are met: .. note:: A cluster member considers the leader node to be alive if the member received heartbeats from the leader at least - once during the ``replication_timeout * 4``, + once during the ``replication.timeout * 4``, and there are no replication errors (the connection is not broken due to timeout or due to an error). Terms and votes are persisted by each instance to preserve certain Raft guarantees. @@ -105,7 +105,7 @@ During the election, the nodes prefer to vote for those ones that have the newest data. So as if an old leader managed to send something before its death to a quorum of replicas, that data wouldn't be lost. -When :ref:`election is enabled `, there must be connections +When election is enabled, there must be connections between each node pair so as it would be the full mesh topology. This is needed because election messages for voting and other internal things need a direct connection between the nodes. @@ -117,26 +117,26 @@ Once the leader is elected, it considers itself in the leader position until rec This can lead to a split situation if the other nodes elect a new leader upon losing the connectivity to the previous one. The issue is resolved in Tarantool version :doc:`2.10.0 ` by introducing the leader *fencing* mode. -The mode can be switched by the :ref:`election_fencing_mode ` configuration parameter. +The mode can be switched by the :ref:`replication.election_fencing_mode ` configuration parameter. When the fencing is set to ``soft`` or ``strict``, the leader resigns its leadership if it has less than -:ref:`replication_synchro_quorum ` of alive connections to the cluster nodes. +:ref:`replication.synchro_quorum ` of alive connections to the cluster nodes. The resigning leader receives the status of a follower in the current election term and becomes read-only. -Leader *fencing* can be turned off by setting the :ref:`election_fencing_mode ` configuration parameter to ``off``. +Leader *fencing* can be turned off by setting the :ref:`replication.election_fencing_mode ` configuration parameter to ``off``. In ``soft`` mode, a connection is considered dead if there are no responses for -:ref:`4*replication_timeout ` seconds both on the current leader and the followers. +:ref:`4 * replication.timeout ` seconds both on the current leader and the followers. In ``strict`` mode, a connection is considered dead if there are no responses -for :ref:`2*replication_timeout ` seconds on the current leader and for -:ref:`4*replication_timeout ` seconds on the followers. +for :ref:`2 * replication.timeout ` seconds on the current leader and for +:ref:`4 * replication.timeout ` seconds on the followers. This improves chances that there is only one leader at any time. -Fencing applies to the instances that have the :ref:`election_mode ` set to "candidate" or "manual". +Fencing applies to the instances that have the :ref:`replication.election_mode ` set to "candidate" or "manual". .. _repl_leader_elect_splitbrain: There can still be a situation when a replica set has two leaders working independently (so-called *split-brain*). -It can happen, for example, if a user mistakenly lowered the :ref:`replication_synchro_quorum ` below ``N / 2 + 1``. +It can happen, for example, if a user mistakenly lowered the :ref:`replication.synchro_quorum ` below ``N / 2 + 1``. In this situation, to preserve the data integrity, if an instance detects the split-brain anomaly in the incoming replication data, it breaks the connection with the instance sending the data and writes the ``ER_SPLIT_BRAIN`` error in the log. @@ -155,3 +155,99 @@ to the other nodes. Term numbers also work as a kind of filter. For example, if election is enabled on two nodes and ``node1`` has the term number less than ``node2``, then ``node2`` doesn't accept any transactions from ``node1``. + + +.. _how-to-repl_leader_elect: + +Managing leader elections +------------------------- + +.. _repl_leader_elect_config: + +Configuration +~~~~~~~~~~~~~ + +.. code-block:: yaml + + replication: + election_mode: + election_fencing_mode: + election_timeout: + timeout: + synchro_quorum: + + +* :ref:`replication.election_mode ` -- specifies the role of a node in the leader election + process. +* :ref:`replication.election_fencing_mode ` -- specifies the :ref:`leader fencing mode `. +* :ref:`replication.election_timeout ` -- specifies the timeout between election rounds if the + previous round ended up with a split vote. +* :ref:`replication.timeout ` -- a time interval (in seconds) used by a master to send heartbeat requests to a replica when there are no updates to send to this replica. +* :ref:`replication.synchro_quorum ` -- a number of replicas that should confirm the receipt of a :ref:`synchronous ` transaction before it can finish its commit. + +It is important to know that being a leader is not the only requirement for a node to be writable. +The leader should also satisfy the following requirements: + +* The :ref:`database.mode ` option is set to ``rw``. + +* The leader shouldn't be in the orphan state. + +Nothing prevents you from setting the ``database.mode`` option to ``ro``, +but the leader won't be writable then. The option doesn't affect the +election process itself, so a read-only instance can still vote and become +a leader. + +.. _repl_leader_elect_monitoring: + +Monitoring +~~~~~~~~~~ + +To monitor the current state of a node regarding the leader election, use the :doc:`box.info.election ` function. + +**Example:** + +.. code-block:: console + + tarantool> box.info.election + --- + - state: follower + vote: 0 + leader: 0 + term: 1 + ... + +The Raft-based election implementation logs all its actions +with the ``RAFT:`` prefix. The actions are new Raft message handling, +node state changing, voting, and term bumping. + +.. _repl_leader_elect_important: + +Important notes +~~~~~~~~~~~~~~~ + +Leader election doesn't work correctly if the election quorum is set to less or equal +than `` / 2`` because in that case, a split vote can lead to +a state when two leaders are elected at once. + +For example, suppose there are five nodes. When the quorum is set to ``2``, ``node1`` +and ``node2`` can both vote for ``node1``. ``node3`` and ``node4`` can both vote +for ``node5``. In this case, ``node1`` and ``node5`` both win the election. +When the quorum is set to the cluster majority, that is +``( / 2) + 1`` or greater, the split vote is impossible. + +That should be considered when adding new nodes. +If the majority value is changing, it's better to update the quorum on all the existing nodes +before adding a new one. + +Also, the automated leader election doesn't bring many benefits in terms of data +safety when used *without* :ref:`synchronous replication `. +If the replication is asynchronous and a new leader gets elected, +the old leader is still active and considers itself the leader. +In such case, nothing stops +it from accepting requests from clients and making transactions. +Non-synchronous transactions are successfully committed because +they are not checked against the quorum of replicas. +Synchronous transactions fail because they are not able +to collect the quorum -- most of the replicas reject +these old leader's transactions since it is not a leader anymore. + diff --git a/doc/concepts/replication/repl_sync.rst b/doc/concepts/replication/repl_sync.rst index 40b21ff4b0..21a70d414e 100644 --- a/doc/concepts/replication/repl_sync.rst +++ b/doc/concepts/replication/repl_sync.rst @@ -15,8 +15,7 @@ to a replica, from the client's point of view the transaction will disappear. are not considered committed and are not responded to a client until they are replicated onto some number of replicas. -To learn how to enable and use synchronous replication, -check the :ref:`guide `. +To enable synchronous replication, use the :ref:`space_opts.is_sync ` option when creating or altering a space. Synchronous and asynchronous transactions ----------------------------------------- diff --git a/doc/how-to/replication/index.rst b/doc/how-to/replication/index.rst index 463aa246eb..a7cfc1f561 100644 --- a/doc/how-to/replication/index.rst +++ b/doc/how-to/replication/index.rst @@ -1,4 +1,5 @@ .. _how-to-replication: +.. _replication-setup: Replication tutorials ===================== @@ -7,8 +8,6 @@ Replication tutorials :maxdepth: 2 repl_bootstrap - repl_add_instances - repl_remove_instances - repl_sync - repl_leader_elect + repl_bootstrap_auto + repl_bootstrap_master_master \ No newline at end of file diff --git a/doc/how-to/replication/repl_add_instances.rst b/doc/how-to/replication/repl_add_instances.rst deleted file mode 100644 index 8eb87e5ce8..0000000000 --- a/doc/how-to/replication/repl_add_instances.rst +++ /dev/null @@ -1,166 +0,0 @@ -.. _replication-add_instances: - -Adding instances -================ - -.. _replication-add_replica: - -This tutorial is intended as a follow-up to the -:ref:`replication bootstrapping ` guide. -It continues building on the examples from that page. -It is recommended that you complete the bootstrapping guide before you proceed. - - -Adding a replica ----------------- - -.. image:: mr-1m-2r-mesh-add.png - :align: center - -To add a second **replica** instance to the **master-replica** set from our -:ref:`bootstrapping example `, we need an -analog of the instance file that we created for the first replica in that set: - -.. code-block:: lua - - -- instance file for replica #2 - box.cfg{ - listen = 3301, - replication = {'replicator:password@192.168.0.101:3301', -- master URI - 'replicator:password@192.168.0.102:3301', -- replica #1 URI - 'replicator:password@192.168.0.103:3301'}, -- replica #2 URI - read_only = true - } - box.once("schema", function() - box.schema.user.create('replicator', {password = 'password'}) - box.schema.user.grant('replicator', 'replication') -- grant replication role - box.schema.space.create("test") - box.space.test:create_index("primary") - print('box.once executed on replica #2') - end) - -Here we add the URI of replica #2 to the :ref:`replication ` -parameter, so now it contains three URIs. - -After we launch the new replica instance, it gets connected to the master -instance and retrieves the master's write-ahead-log and snapshot files: - -.. code-block:: console - - $ # launching replica #2 - $ tarantool replica2.lua - 2017-06-14 14:54:33.927 [46945] main/101/replica2.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 14:54:33.927 [46945] main/101/replica2.lua C> log level 5 - 2017-06-14 14:54:33.928 [46945] main/101/replica2.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 14:54:33.930 [46945] main/104/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 14:54:33.930 [46945] main/104/applier/replicator@192.168.0.10 I> authenticated - 2017-06-14 14:54:33.930 [46945] main/101/replica2.lua I> bootstrapping replica from 192.168.0.101:3301 - 2017-06-14 14:54:33.933 [46945] main/104/applier/replicator@192.168.0.10 I> initial data received - 2017-06-14 14:54:33.933 [46945] main/104/applier/replicator@192.168.0.10 I> final data received - 2017-06-14 14:54:33.934 [46945] snapshot/101/main I> saving snapshot `/var/lib/tarantool/replica2/00000000000000000010.snap.inprogress' - 2017-06-14 14:54:33.934 [46945] snapshot/101/main I> done - 2017-06-14 14:54:33.935 [46945] main/101/replica2.lua I> vinyl checkpoint done - 2017-06-14 14:54:33.935 [46945] main/101/replica2.lua I> ready to accept requests - 2017-06-14 14:54:33.935 [46945] main/101/replica2.lua I> set 'read_only' configuration option to true - 2017-06-14 14:54:33.936 [46945] main C> entering the event loop - -Since we are adding a read-only instance, there is no need to dynamically -update the ``replication`` parameter on the other running instances. This update -would be required if we :ref:`added a master instance `. - -However, we recommend specifying the URI of replica #3 in all instance files of the -replica set. This will keep all the files consistent with each other and with -the current replication topology, and so will help to avoid configuration errors -in case of further configuration updates and replica set restart. - -.. _replication-add_master: - -Adding a master ---------------- - -.. image:: mm-3m-mesh-add.png - :align: center - -To add a third master instance to the **master-master** set from our -:ref:`bootstrapping example `, we need an -analog of the instance files that we created to bootstrap the other master -instances in that set: - -.. code-block:: lua - - -- instance file for master #3 - box.cfg{ - listen = 3301, - replication = {'replicator:password@192.168.0.101:3301', -- master#1 URI - 'replicator:password@192.168.0.102:3301', -- master#2 URI - 'replicator:password@192.168.0.103:3301'}, -- master#3 URI - read_only = true, -- temporarily read-only - } - box.once("schema", function() - box.schema.user.create('replicator', {password = 'password'}) - box.schema.user.grant('replicator', 'replication') -- grant replication role - box.schema.space.create("test") - box.space.test:create_index("primary") - end) - -Here we make the following changes: - -* Add the URI of master #3 to the :ref:`replication ` - parameter. -* Temporarily specify :ref:`read_only=true ` to disable - data-change operations on the instance. After launch, master #3 will act as a - replica until it retrieves all data from the other masters in the replica set. - -After we launch master #3, it gets connected to the other master -instances and retrieves their write-ahead-log and snapshot files: - -.. code-block:: console - - $ # launching master #3 - $ tarantool master3.lua - 2017-06-14 17:10:00.556 [47121] main/101/master3.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 17:10:00.557 [47121] main/101/master3.lua C> log level 5 - 2017-06-14 17:10:00.557 [47121] main/101/master3.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 17:10:00.559 [47121] iproto/101/main I> binary: bound to [::]:3301 - 2017-06-14 17:10:00.559 [47121] main/104/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 17:10:00.559 [47121] main/105/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.102:3301 - 2017-06-14 17:10:00.559 [47121] main/106/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.103:3301 - 2017-06-14 17:10:00.559 [47121] main/105/applier/replicator@192.168.0.10 I> authenticated - 2017-06-14 17:10:00.559 [47121] main/101/master3.lua I> bootstrapping replica from 192.168.0.102:3301 - 2017-06-14 17:10:00.562 [47121] main/105/applier/replicator@192.168.0.10 I> initial data received - 2017-06-14 17:10:00.562 [47121] main/105/applier/replicator@192.168.0.10 I> final data received - 2017-06-14 17:10:00.562 [47121] snapshot/101/main I> saving snapshot `/Users/e.shebunyaeva/work/tarantool-test-repl/master3_dir/00000000000000000009.snap.inprogress' - 2017-06-14 17:10:00.562 [47121] snapshot/101/main I> done - 2017-06-14 17:10:00.564 [47121] main/101/master3.lua I> vinyl checkpoint done - 2017-06-14 17:10:00.564 [47121] main/101/master3.lua I> ready to accept requests - 2017-06-14 17:10:00.565 [47121] main/101/master3.lua I> set 'read_only' configuration option to true - 2017-06-14 17:10:00.565 [47121] main C> entering the event loop - 2017-06-14 17:10:00.565 [47121] main/104/applier/replicator@192.168.0.10 I> authenticated - -Next, we add the URI of master #3 to the ``replication`` parameter on the existing two -masters. Replication-related parameters are dynamic, so we only need to make a -``box.cfg{}`` request on each of the running instances: - -.. code-block:: tarantoolsession - - # adding master #3 URI to replication sources - tarantool> box.cfg{replication = - > {'replicator:password@192.168.0.101:3301', - > 'replicator:password@192.168.0.102:3301', - > 'replicator:password@192.168.0.103:3301'}} - --- - ... - -When master #3 catches up with the other masters' state, we can disable -read-only mode for this instance: - -.. code-block:: tarantoolsession - - # making master #3 a real master - tarantool> box.cfg{read_only=false} - --- - ... - -We also recommend to specify master #3 URI in all instance files in order to -keep all the files consistent with each other and with the current replication -topology. diff --git a/doc/how-to/replication/repl_bootstrap.rst b/doc/how-to/replication/repl_bootstrap.rst index a6f1a81bff..9bfbb6a972 100644 --- a/doc/how-to/replication/repl_bootstrap.rst +++ b/doc/how-to/replication/repl_bootstrap.rst @@ -1,386 +1,647 @@ .. _replication-bootstrap: +.. _replication-master_replica_bootstrap: -Bootstrapping a replica set -=========================== +Master-replica: manual failover +=============================== -.. _replication-setup: +**Example on GitHub**: `manual_leader `_ -Replication setup ------------------ -To enable replication, you need to specify two parameters in a ``box.cfg{}`` -request: +.. _replication-tt-env: -* :ref:`replication ` which defines the - replication source(s), and -* :ref:`read_only ` which is ``true`` for a - replica and ``false`` for a master. +Prerequisites +------------- -Both these parameters are "dynamic". This allows a replica to become a master -and vice versa on the fly with the help of a ``box.cfg{}`` request. +Before starting this tutorial: + +1. Install the :ref:`tt ` utility. + +2. Create a tt environment in the current directory by executing the :ref:`tt init ` command. + +3. Inside the ``instances.enabled`` directory of the created tt environment, create the ``manual_leader`` directory. + +4. Inside ``instances.enabled/manual_leader``, create the ``instances.yml`` and ``config.yaml`` files: + + - ``instances.yml`` specifies instances to :ref:`run ` in the current environment and should look like this: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/instances.yml + :language: yaml + :end-at: instance002: + :dedent: + + - The ``config.yaml`` file is intended to store a :ref:`replica set configuration `. + + + +.. _replication-master_replica_configuring: + +Configuring a replica set +------------------------- + +This section describes how to configure a replica set in ``config.yaml``. + +.. _replication-master_replica_configuring_failover_mode: + +Step 1: Configuring a failover mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, set the :ref:`replication.failover ` option to ``manual``: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :start-at: replication: + :end-at: failover: manual + :dedent: + +.. _replication-master_replica_configuring_topology: + +Step 2: Defining a replica set topology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Define a replica set topology inside the :ref:`groups ` section: + +- The :ref:`leader ` option sets ``instance001`` as a replica set leader. +- The :ref:`iproto.listen ` option specifies an address used to listen for incoming requests and allows replicas to communicate with each other. + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :start-at: groups: + :end-at: listen: 127.0.0.1:3302 + :dedent: + + +.. _replication-master_replica_configuring_credentials: + +Step 3: Creating a user for replication +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the :ref:`credentials ` section, create the ``replicator`` user with the ``replication`` role: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :start-at: credentials: + :end-at: roles: [replication] + :dedent: + + +.. _replication-master_replica_configuring_advertise: + +Step 4: Specifying advertise URIs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set :ref:`iproto.advertise.peer ` to advertise the current instance to other replica set members: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :start-at: iproto: + :end-at: peer: replicator@ + :dedent: + +.. _replication-master_replica_configuring_result: + +Resulting configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +The resulting replica set configuration should look as follows: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :end-at: listen: 127.0.0.1:3302 + :dedent: + + + + +.. _replication-master_replica_work: + +Working with a replica set +-------------------------- + +.. _replication-master_replica_starting: + +Starting instances +~~~~~~~~~~~~~~~~~~ + +1. After configuring a replica set, execute the :ref:`tt start ` command from the :ref:`tt environment directory `: + + .. code-block:: console + + $ tt start manual_leader + • Starting an instance [manual_leader:instance001]... + • Starting an instance [manual_leader:instance002]... + +2. Check that instances are in the ``RUNNING`` status using the :ref:`tt status ` command: + + .. code-block:: console + + $ tt status manual_leader + INSTANCE STATUS PID + manual_leader:instance001 RUNNING 15272 + manual_leader:instance002 RUNNING 15273 + + +.. _replication-master_replica_status: + +Checking a replica set status +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Connect to ``instance001`` using :ref:`tt connect `: + + .. code-block:: console + + $ tt connect manual_leader:instance001 + • Connecting to the instance... + • Connected to manual_leader:instance001 + +2. Make sure that the instance is in the ``running`` state by executing :ref:`box.info.status `: + + .. code-block:: console + + manual_leader:instance001> box.info.status + --- + - running + ... + +3. Check that the instance is writable using ``box.info.ro``: + + .. code-block:: console + + manual_leader:instance001> box.info.ro + --- + - false + ... + +4. Execute ``box.info.replication`` to check a replica set status. + For ``instance002``, ``upstream.status`` and ``downstream.status`` should be ``follow``. + + .. code-block:: console + + manual_leader:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 7 + name: instance001 + 2: + id: 2 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 0 + upstream: + status: follow + idle: 0.3893879999996 + peer: replicator@127.0.0.1:3302 + lag: 0.00028800964355469 + name: instance002 + downstream: + status: follow + idle: 0.37777199999982 + vclock: {1: 7} + lag: 0 + ... + + +.. _replication-master_replica_add_data: + +Adding data +~~~~~~~~~~~ + +To check that a replica (``instance002``) gets all updates from the master, follow the steps below: + +1. On ``instance001``, create a space and add data as described in :ref:`CRUD operation examples `. + +2. Open the second terminal, connect to ``instance002`` using ``tt connect``, and use the ``select`` operation to make sure data is replicated. + +3. Check that :ref:`box.info.vclock ` values are the same on both instances: + + - ``instance001``: + + .. code-block:: console + + manual_leader:instance001> box.info.vclock + --- + - {1: 21} + ... + + - ``instance002``: + + .. code-block:: console + + manual_leader:instance002> box.info.vclock + --- + - {1: 21} + ... + + + +.. _replication-add_instances: + +Adding instances +---------------- + +This section describes how to add a new replica to a replica set. + +.. _replication-add_instances-update-config: + +Adding an instance to the configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Add ``instance003`` to the ``instances.yml`` file: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/instances.yml + :language: yaml + :dedent: + +2. Add ``instance003`` with the specified ``iproto.listen`` option to the ``config.yaml`` file: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :start-at: groups: + :end-at: listen: 127.0.0.1:3303 + :dedent: + + +.. _replication-add_instances-start-instance: + +Starting an instance +~~~~~~~~~~~~~~~~~~~~ + +1. Open the third terminal to work with a new instance. + Start ``instance003`` using ``tt start``: + + .. code-block:: console + + $ tt start manual_leader:instance003 + • Starting an instance [manual_leader:instance003]... + +2. Check a replica set status using ``tt status``: + + .. code-block:: console + + $ tt status manual_leader + INSTANCE STATUS PID + manual_leader:instance001 RUNNING 15272 + manual_leader:instance002 RUNNING 15273 + manual_leader:instance003 RUNNING 15551 + + +.. _replication-add_instances-reload-config: + +Reloading configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +1. Connect to ``instance003`` using ``tt connect``: + + .. code-block:: console + + $ tt connect manual_leader:instance003 + • Connecting to the instance... + • Connected to manual_leader:instance001 + +2. Reload configurations on all three instances using the ``reload()`` function provided by the :ref:`config ` module: + + - ``instance001``: + + .. code-block:: console + + manual_leader:instance001> require('config'):reload() + --- + ... + + - ``instance002``: + + .. code-block:: console + + manual_leader:instance002> require('config'):reload() + --- + ... + + - ``instance003``: + + .. code-block:: console + + manual_leader:instance003> require('config'):reload() + --- + ... + + +3. Execute ``box.info.replication`` to check a replica set status. + Make sure that ``upstream.status`` and ``downstream.status`` are ``follow`` for ``instance003``. + + .. code-block:: console + + manual_leader:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 21 + name: instance001 + 2: + id: 2 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 0 + upstream: + status: follow + idle: 0.052655000000414 + peer: replicator@127.0.0.1:3302 + lag: 0.00010204315185547 + name: instance002 + downstream: + status: follow + idle: 0.09503500000028 + vclock: {1: 21} + lag: 0.00026917457580566 + 3: + id: 3 + uuid: 9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6 + lsn: 0 + upstream: + status: follow + idle: 0.77522099999987 + peer: replicator@127.0.0.1:3303 + lag: 0.0001838207244873 + name: instance003 + downstream: + status: follow + idle: 0.33186100000012 + vclock: {1: 21} + lag: 0 + ... -.. _replication-master_replica_bootstrap: -Master-replica bootstrap ------------------------- - -Let us first bootstrap a simple **master-replica** set containing two instances, -each located on its own machine. For easier administration, we make the -:ref:`instance files ` almost identical. - -.. image:: mr-1m-1r-twoway.png - :align: center - -Here is an example of the master's instance file: - -.. code-block:: lua - - -- instance file for the master - box.cfg{ - listen = 3301, - replication = {'replicator:password@192.168.0.101:3301', -- master URI - 'replicator:password@192.168.0.102:3301'}, -- replica URI - read_only = false - } - box.once("schema", function() - box.schema.user.create('replicator', {password = 'password'}) - box.schema.user.grant('replicator', 'replication') -- grant replication role - box.schema.space.create("test") - box.space.test:create_index("primary") - print('box.once executed on master') - end) - -where: - -* the ``box.cfg()`` :ref:`listen ` parameter defines a URI - (port 3301 in our example), on which the master can accept connections from - replicas. -* the ``box.cfg()`` :ref:`replication ` parameter - defines the URIs at which all instances in the replica set can accept connections. - It includes the replica's URI as well, although the replica is not a replication - source right now. This parameter is mandatory only for master-master or full-mesh - cluster setups. - - .. note:: - - For security reasons, we recommend that administrators prevent unauthorized - replication sources by associating a password with every user that has a - replication :ref:`role `. That way, the :ref:`URI - ` for ``replication`` parameter must have the long form - ``username:password@host:port``. - -* the :ref:`read_only = false ` parameter setting enables - data-change operations on the instance and makes the instance act as a master, - not as a replica. *That is the only parameter setting in our instance files - that will differ.* -* the :doc:`box.once() ` function contains database initialization logic - that should be executed only once during the replica set lifetime. - -In this example, we create a space with a primary index, and a user for -replication purposes. We also say ``print('box.once executed on master')`` -so that it will later be visible on a console whether ``box.once()`` was executed. - -.. note:: - - Replication requires privileges. We can grant privileges for accessing spaces - directly to the user who will start the instance. However, it is more usual - to grant privileges for accessing spaces to a - :ref:`role `, and then grant the role to the user who - will start the replica. - -Here we use Tarantool's predefined role named "replication" which by default -grants "read" privileges for all database objects ("universe"), and we can -change privileges for this role as required. - -In the replica's instance file, we set the ``read_only`` parameter to "true", and -say ``print('box.once executed on replica')`` so that later it will be visible -that ``box.once()`` was not executed more than once. -Otherwise the replica's instance file is identical to the master's instance file. - -.. code-block:: lua - - -- instance file for the replica - box.cfg{ - listen = 3301, - replication = {'replicator:password@192.168.0.101:3301', -- master URI - 'replicator:password@192.168.0.102:3301'}, -- replica URI - read_only = true - } - box.once("schema", function() - box.schema.user.create('replicator', {password = 'password'}) - box.schema.user.grant('replicator', 'replication') -- grant replication role - box.schema.space.create("test") - box.space.test:create_index("primary") - print('box.once executed on replica') - end) - -.. note:: - - The replica does not inherit the master’s configuration parameters, such as - those making the :ref:`checkpoint daemon ` run on - the master. To get the same behavior, set the relevant parameters - explicitly so that they are the same on both master and replica. - -Now we can launch the two instances. The master... - -.. code-block:: console - - $ # launching the master - $ tarantool master.lua - 2017-06-14 14:12:03.847 [18933] main/101/master.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 14:12:03.848 [18933] main/101/master.lua C> log level 5 - 2017-06-14 14:12:03.849 [18933] main/101/master.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 14:12:03.859 [18933] iproto/101/main I> binary: bound to [::]:3301 - 2017-06-14 14:12:03.861 [18933] main/105/applier/replicator@192.168.0. I> can't connect to master - 2017-06-14 14:12:03.861 [18933] main/105/applier/replicator@192.168.0. coio.cc:105 !> SystemError connect, called on fd 14, aka 192.168.0.102:56736: Connection refused - 2017-06-14 14:12:03.861 [18933] main/105/applier/replicator@192.168.0. I> will retry every 1 second - 2017-06-14 14:12:03.861 [18933] main/104/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 14:12:19.878 [18933] main/105/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.102:3301 - 2017-06-14 14:12:19.879 [18933] main/101/master.lua I> initializing an empty data directory - 2017-06-14 14:12:19.908 [18933] snapshot/101/main I> saving snapshot `/var/lib/tarantool/master/00000000000000000000.snap.inprogress' - 2017-06-14 14:12:19.914 [18933] snapshot/101/main I> done - 2017-06-14 14:12:19.914 [18933] main/101/master.lua I> vinyl checkpoint done - 2017-06-14 14:12:19.917 [18933] main/101/master.lua I> ready to accept requests - 2017-06-14 14:12:19.918 [18933] main/105/applier/replicator@192.168.0. I> failed to authenticate - 2017-06-14 14:12:19.918 [18933] main/105/applier/replicator@192.168.0. xrow.cc:431 E> ER_LOADING: Instance bootstrap hasn't finished yet - box.once executed on master - 2017-06-14 14:12:19.920 [18933] main C> entering the event loop - -... (the display confirms that ``box.once()`` was executed on the master) -- and the replica: - -.. code-block:: console - - $ # launching the replica - $ tarantool replica.lua - 2017-06-14 14:12:19.486 [18934] main/101/replica.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 14:12:19.486 [18934] main/101/replica.lua C> log level 5 - 2017-06-14 14:12:19.487 [18934] main/101/replica.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 14:12:19.494 [18934] iproto/101/main I> binary: bound to [::]:3311 - 2017-06-14 14:12:19.495 [18934] main/104/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 14:12:19.495 [18934] main/105/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.102:3302 - 2017-06-14 14:12:19.496 [18934] main/104/applier/replicator@192.168.0. I> failed to authenticate - 2017-06-14 14:12:19.496 [18934] main/104/applier/replicator@192.168.0. xrow.cc:431 E> ER_LOADING: Instance bootstrap hasn't finished yet - -In both logs, there are messages saying that the replica was bootstrapped from the master: - -.. code-block:: console - - $ # bootstrapping the replica (from the master's log) - <...> - 2017-06-14 14:12:20.503 [18933] main/106/main I> initial data sent. - 2017-06-14 14:12:20.505 [18933] relay/[::ffff:192.168.0.101]:/101/main I> recover from `/var/lib/tarantool/master/00000000000000000000.xlog' - 2017-06-14 14:12:20.505 [18933] main/106/main I> final data sent. - 2017-06-14 14:12:20.522 [18933] relay/[::ffff:192.168.0.101]:/101/main I> recover from `/Users/e.shebunyaeva/work/tarantool-test-repl/master_dir/00000000000000000000.xlog' - 2017-06-14 14:12:20.922 [18933] main/105/applier/replicator@192.168.0. I> authenticated - -.. code-block:: console - - $ # bootstrapping the replica (from the replica's log) - <...> - 2017-06-14 14:12:20.498 [18934] main/104/applier/replicator@192.168.0. I> authenticated - 2017-06-14 14:12:20.498 [18934] main/101/replica.lua I> bootstrapping replica from 192.168.0.101:3301 - 2017-06-14 14:12:20.512 [18934] main/104/applier/replicator@192.168.0. I> initial data received - 2017-06-14 14:12:20.512 [18934] main/104/applier/replicator@192.168.0. I> final data received - 2017-06-14 14:12:20.517 [18934] snapshot/101/main I> saving snapshot `/var/lib/tarantool/replica/00000000000000000005.snap.inprogress' - 2017-06-14 14:12:20.518 [18934] snapshot/101/main I> done - 2017-06-14 14:12:20.519 [18934] main/101/replica.lua I> vinyl checkpoint done - 2017-06-14 14:12:20.520 [18934] main/101/replica.lua I> ready to accept requests - 2017-06-14 14:12:20.520 [18934] main/101/replica.lua I> set 'read_only' configuration option to true - 2017-06-14 14:12:20.520 [18934] main C> entering the event loop - -Notice that ``box.once()`` was executed only at the master, although we added -``box.once()`` to both instance files. - -We could as well launch the replica first: - -.. code-block:: console - - $ # launching the replica - $ tarantool replica.lua - 2017-06-14 14:35:36.763 [18952] main/101/replica.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 14:35:36.765 [18952] main/101/replica.lua C> log level 5 - 2017-06-14 14:35:36.765 [18952] main/101/replica.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 14:35:36.772 [18952] iproto/101/main I> binary: bound to [::]:3301 - 2017-06-14 14:35:36.772 [18952] main/104/applier/replicator@192.168.0. I> can't connect to master - 2017-06-14 14:35:36.772 [18952] main/104/applier/replicator@192.168.0. coio.cc:105 !> SystemError connect, called on fd 13, aka 192.168.0.101:56820: Connection refused - 2017-06-14 14:35:36.772 [18952] main/104/applier/replicator@192.168.0. I> will retry every 1 second - 2017-06-14 14:35:36.772 [18952] main/105/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.102:3301 - -... and the master later: - -.. code-block:: console - - $ # launching the master - $ tarantool master.lua - 2017-06-14 14:35:43.701 [18953] main/101/master.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 14:35:43.702 [18953] main/101/master.lua C> log level 5 - 2017-06-14 14:35:43.702 [18953] main/101/master.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 14:35:43.709 [18953] iproto/101/main I> binary: bound to [::]:3301 - 2017-06-14 14:35:43.709 [18953] main/105/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.102:3301 - 2017-06-14 14:35:43.709 [18953] main/104/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 14:35:43.709 [18953] main/101/master.lua I> initializing an empty data directory - 2017-06-14 14:35:43.721 [18953] snapshot/101/main I> saving snapshot `/var/lib/tarantool/master/00000000000000000000.snap.inprogress' - 2017-06-14 14:35:43.722 [18953] snapshot/101/main I> done - 2017-06-14 14:35:43.723 [18953] main/101/master.lua I> vinyl checkpoint done - 2017-06-14 14:35:43.723 [18953] main/101/master.lua I> ready to accept requests - 2017-06-14 14:35:43.724 [18953] main/105/applier/replicator@192.168.0. I> failed to authenticate - 2017-06-14 14:35:43.724 [18953] main/105/applier/replicator@192.168.0. xrow.cc:431 E> ER_LOADING: Instance bootstrap hasn't finished yet - box.once executed on master - 2017-06-14 14:35:43.726 [18953] main C> entering the event loop - 2017-06-14 14:35:43.779 [18953] main/103/main I> initial data sent. - 2017-06-14 14:35:43.780 [18953] relay/[::ffff:192.168.0.101]:/101/main I> recover from `/var/lib/tarantool/master/00000000000000000000.xlog' - 2017-06-14 14:35:43.780 [18953] main/103/main I> final data sent. - 2017-06-14 14:35:43.796 [18953] relay/[::ffff:192.168.0.102]:/101/main I> recover from `/var/lib/tarantool/master/00000000000000000000.xlog' - 2017-06-14 14:35:44.726 [18953] main/105/applier/replicator@192.168.0. I> authenticated - -In this case, the replica would wait for the master to become available, so the -launch order doesn't matter. Our ``box.once()`` logic would also be executed -only once, at the master. - -.. code-block:: console - - $ # the replica has eventually connected to the master - $ # and got bootstrapped (from the replica's log) - 2017-06-14 14:35:43.777 [18952] main/104/applier/replicator@192.168.0. I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 14:35:43.777 [18952] main/104/applier/replicator@192.168.0. I> authenticated - 2017-06-14 14:35:43.777 [18952] main/101/replica.lua I> bootstrapping replica from 192.168.0.199:3310 - 2017-06-14 14:35:43.788 [18952] main/104/applier/replicator@192.168.0. I> initial data received - 2017-06-14 14:35:43.789 [18952] main/104/applier/replicator@192.168.0. I> final data received - 2017-06-14 14:35:43.793 [18952] snapshot/101/main I> saving snapshot `/var/lib/tarantool/replica/00000000000000000005.snap.inprogress' - 2017-06-14 14:35:43.793 [18952] snapshot/101/main I> done - 2017-06-14 14:35:43.795 [18952] main/101/replica.lua I> vinyl checkpoint done - 2017-06-14 14:35:43.795 [18952] main/101/replica.lua I> ready to accept requests - 2017-06-14 14:35:43.795 [18952] main/101/replica.lua I> set 'read_only' configuration option to true - 2017-06-14 14:35:43.795 [18952] main C> entering the event loop .. _replication-controlled_failover: -Controlled failover -------------------- +Performing manual failover +-------------------------- + +This section shows how to perform manual failover and change a replica set leader. + +.. _replication-controlled_failover_read_only: + +Switching instances to read-only mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. In the ``config.yaml`` file, change the replica set leader from ``instance001`` to ``null``: + + .. code-block:: yaml + + replicaset001: + leader: null + +2. Reload configurations on all three instances using :ref:`config:reload() ` and check that instances are in read-only mode. + The example below shows how to do this for ``instance001``: + + .. code-block:: console + + manual_leader:instance001> require('config'):reload() + --- + ... + manual_leader:instance001> box.info.ro + --- + - true + ... + manual_leader:instance001> box.info.ro_reason + --- + - config + ... + + +3. Make sure that :ref:`box.info.vclock ` values are the same on all instances: + + - ``instance001``: + + .. code-block:: console + + manual_leader:instance001> box.info.vclock + --- + - {1: 21} + ... + + - ``instance002``: + + .. code-block:: console + + manual_leader:instance002> box.info.vclock + --- + - {1: 21} + ... + + + - ``instance003``: + + .. code-block:: console + + manual_leader:instance003> box.info.vclock + --- + - {1: 21} + ... + + +.. _replication-controlled_failover_new_leader: + +Configuring a new leader +~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Change a replica set leader in ``config.yaml`` to ``instance002``: + + .. code-block:: yaml + + replicaset001: + leader: instance002 + +2. Reload configuration on all instances using :ref:`config:reload() `. + +3. Make sure that ``instance002`` is a new master: + + .. code-block:: console + + manual_leader:instance002> box.info.ro + --- + - false + ... + +4. Check replication status using ``box.info.replication``. + + +.. _replication-remove_instances: + +Removing instances +------------------ + +This section describes the process of removing an instance from a replica set. + +Before removing an instance, make sure it is in read-only mode. +If the instance is a master, perform :ref:`manual failover `. + +.. _replication-remove_instances-disconnect: + +Disconnecting an instance +~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Clear the ``iproto`` option for ``instance003`` by setting its value to ``{}``: + + .. code-block:: yaml + + instance003: + iproto: {} + +2. Reload configurations on ``instance001`` and ``instance002``: + + - ``instance001``: + + .. code-block:: console + + manual_leader:instance001> require('config'):reload() + --- + ... + + - ``instance002``: + + .. code-block:: console + + manual_leader:instance002> require('config'):reload() + --- + ... + +3. Check that the ``upstream`` section is missing for ``instance003`` by executing ``box.info.replication[3]``: + + .. code-block:: console + + manual_leader:instance001> box.info.replication[3] + --- + - id: 3 + uuid: 9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6 + lsn: 0 + downstream: + status: follow + idle: 0.4588760000006 + vclock: {1: 21} + lag: 0 + name: instance003 + ... + + +.. _replication-remove_instances-stop: + +Stopping an instance +~~~~~~~~~~~~~~~~~~~~ + +1. Stop ``instance003`` using the :ref:`tt stop ` command: + + .. code-block:: console + + $ tt stop manual_leader:instance003 + • The Instance manual_leader:instance003 (PID = 15551) has been terminated. + + +2. Check that ``downstream.status`` is ``stopped`` for ``instance003``: + + .. code-block:: console + + manual_leader:instance001> box.info.replication[3] + --- + - id: 3 + uuid: 9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6 + lsn: 0 + downstream: + status: stopped + message: 'unexpected EOF when reading from socket, called on fd 27, aka 127.0.0.1:3301, + peer of 127.0.0.1:54185: Broken pipe' + system_message: Broken pipe + name: instance003 + ... + + +.. _replication-remove_instances-update-config: + +Removing an instance from the configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Remove ``instance003`` from the ``instances.yml`` file: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/instances.yml + :language: yaml + :end-at: instance002 + :dedent: + +2. Remove ``instance003`` from ``config.yaml``: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/manual_leader/config.yaml + :language: yaml + :start-at: instances + :end-at: listen: 127.0.0.1:3302 + :dedent: + +3. Reload configurations on ``instance001`` and ``instance002``: + + - ``instance001``: + + .. code-block:: console + + manual_leader:instance001> require('config'):reload() + --- + ... + + - ``instance002``: + + .. code-block:: console + + manual_leader:instance002> require('config'):reload() + --- + ... + + +.. _replication-remove_instances-remove_cluster: + +Removing an instance from the '_cluster' space +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Select all the tuples in the :ref:`box.space._cluster ` system space: + + .. code-block:: console + + manual_leader:instance002> box.space._cluster:select{} + --- + - - [1, '9bb111c2-3ff5-36a7-00f4-2b9a573ea660', 'instance001'] + - [2, '4cfa6e3c-625e-b027-00a7-29b2f2182f23', 'instance002'] + - [3, '9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6', 'instance003'] + ... + +2. Delete a tuple corresponding to ``instance003``: + + .. code-block:: console + + manual_leader:instance002> box.space._cluster:delete(3) + --- + - [3, '9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6', 'instance003'] + ... -To perform a **controlled failover**, that is, swap the roles of the master and -replica, all we need to do is to set ``read_only=true`` at the master, and -``read_only=false`` at the replica. The order of actions is important here. -If a system is running in production, we do not want concurrent writes happening -both at the replica and the master. Nor do we want the new replica to accept -any writes until it has finished fetching all replication data from the old -master. To compare replica and master state, we can use -:ref:`box.info.signature `. +3. Execute ``box.info.replication`` to check the health status: -1. Set ``read_only=true`` at the master. + .. code-block:: console - .. code-block:: tarantoolsession - - # at the master - tarantool> box.cfg{read_only=true} - -2. Record the master’s current position with ``box.info.signature``, containing - the sum of all LSNs in the master’s vector clock. - - .. code-block:: tarantoolsession - - # at the master - tarantool> box.info.signature - -3. Wait until the replica’s signature is the same as the master’s. - - .. code-block:: tarantoolsession - - # at the replica - tarantool> box.info.signature - -4. Set ``read_only=false`` at the replica to enable write operations. - - .. code-block:: tarantoolsession - - # at the replica - tarantool> box.cfg{read_only=false} - -These four steps ensure that the replica doesn’t accept new writes until it’s done -fetching writes from the master. - -.. _replication-master_master_bootstrap: - -Master-master bootstrap ------------------------ - -Now let us bootstrap a two-instance **master-master** set. For easier -administration, we make master#1 and master#2 instance files fully identical. - -.. image:: mm-2m-mesh.png - :align: center - -We re-use the master's instance file from the -:ref:`master-replica example ` above. - -.. code-block:: lua - - -- instance file for any of the two masters - box.cfg{ - listen = 3301, - replication = {'replicator:password@192.168.0.101:3301', -- master1 URI - 'replicator:password@192.168.0.102:3301'}, -- master2 URI - read_only = false - } - box.once("schema", function() - box.schema.user.create('replicator', {password = 'password'}) - box.schema.user.grant('replicator', 'replication') -- grant replication role - box.schema.space.create("test") - box.space.test:create_index("primary") - print('box.once executed on master #1') - end) - -In the :ref:`replication ` parameter, we define the -URIs of both masters in the replica set and say -``print('box.once executed on master #1')`` so it will be clear when and where the -``box.once()`` logic is executed. - -Now we can launch the two masters. Again, the launch order doesn't matter. -The ``box.once()`` logic will also be executed only once, at the master which -is elected as the replica set :ref:`leader ` at bootstrap. - -.. code-block:: console - - $ # launching master #1 - $ tarantool master1.lua - 2017-06-14 15:39:03.062 [47021] main/101/master1.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 15:39:03.062 [47021] main/101/master1.lua C> log level 5 - 2017-06-14 15:39:03.063 [47021] main/101/master1.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 15:39:03.065 [47021] iproto/101/main I> binary: bound to [::]:3301 - 2017-06-14 15:39:03.065 [47021] main/105/applier/replicator@192.168.0.10 I> can't connect to master - 2017-06-14 15:39:03.065 [47021] main/105/applier/replicator@192.168.0.10 coio.cc:107 !> SystemError connect, called on fd 14, aka 192.168.0.102:57110: Connection refused - 2017-06-14 15:39:03.065 [47021] main/105/applier/replicator@192.168.0.10 I> will retry every 1 second - 2017-06-14 15:39:03.065 [47021] main/104/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 15:39:08.070 [47021] main/105/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.102:3301 - 2017-06-14 15:39:08.071 [47021] main/105/applier/replicator@192.168.0.10 I> authenticated - 2017-06-14 15:39:08.071 [47021] main/101/master1.lua I> bootstrapping replica from 192.168.0.102:3301 - 2017-06-14 15:39:08.073 [47021] main/105/applier/replicator@192.168.0.10 I> initial data received - 2017-06-14 15:39:08.074 [47021] main/105/applier/replicator@192.168.0.10 I> final data received - 2017-06-14 15:39:08.074 [47021] snapshot/101/main I> saving snapshot `/Users/e.shebunyaeva/work/tarantool-test-repl/master1_dir/00000000000000000008.snap.inprogress' - 2017-06-14 15:39:08.074 [47021] snapshot/101/main I> done - 2017-06-14 15:39:08.076 [47021] main/101/master1.lua I> vinyl checkpoint done - 2017-06-14 15:39:08.076 [47021] main/101/master1.lua I> ready to accept requests - box.once executed on master #1 - 2017-06-14 15:39:08.077 [47021] main C> entering the event loop - -.. code-block:: console - - $ # launching master #2 - $ tarantool master2.lua - 2017-06-14 15:39:07.452 [47022] main/101/master2.lua C> version 1.7.4-52-g980d30092 - 2017-06-14 15:39:07.453 [47022] main/101/master2.lua C> log level 5 - 2017-06-14 15:39:07.453 [47022] main/101/master2.lua I> mapping 268435456 bytes for tuple arena... - 2017-06-14 15:39:07.455 [47022] iproto/101/main I> binary: bound to [::]:3301 - 2017-06-14 15:39:07.455 [47022] main/104/applier/replicator@192.168.0.19 I> remote master is 1.7.4 at 192.168.0.101:3301 - 2017-06-14 15:39:07.455 [47022] main/105/applier/replicator@192.168.0.10 I> remote master is 1.7.4 at 192.168.0.102:3301 - 2017-06-14 15:39:07.455 [47022] main/101/master2.lua I> initializing an empty data directory - 2017-06-14 15:39:07.457 [47022] snapshot/101/main I> saving snapshot `/Users/e.shebunyaeva/work/tarantool-test-repl/master2_dir/00000000000000000000.snap.inprogress' - 2017-06-14 15:39:07.457 [47022] snapshot/101/main I> done - 2017-06-14 15:39:07.458 [47022] main/101/master2.lua I> vinyl checkpoint done - 2017-06-14 15:39:07.459 [47022] main/101/master2.lua I> ready to accept requests - 2017-06-14 15:39:07.460 [47022] main C> entering the event loop - 2017-06-14 15:39:08.072 [47022] main/103/main I> initial data sent. - 2017-06-14 15:39:08.073 [47022] relay/[::ffff:192.168.0.102]:/101/main I> recover from `/Users/e.shebunyaeva/work/tarantool-test-repl/master2_dir/00000000000000000000.xlog' - 2017-06-14 15:39:08.073 [47022] main/103/main I> final data sent. - 2017-06-14 15:39:08.077 [47022] relay/[::ffff:192.168.0.102]:/101/main I> recover from `/Users/e.shebunyaeva/work/tarantool-test-repl/master2_dir/00000000000000000000.xlog' - 2017-06-14 15:39:08.461 [47022] main/104/applier/replicator@192.168.0.10 I> authenticated + manual_leader:instance002> box.info.replication + --- + - 1: + id: 1 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 21 + upstream: + status: follow + idle: 0.73316000000159 + peer: replicator@127.0.0.1:3301 + lag: 0.00016212463378906 + name: instance001 + downstream: + status: follow + idle: 0.7269320000014 + vclock: {2: 1, 1: 21} + lag: 0.00083398818969727 + 2: + id: 2 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 1 + name: instance002 + ... diff --git a/doc/how-to/replication/repl_bootstrap_auto.rst b/doc/how-to/replication/repl_bootstrap_auto.rst new file mode 100644 index 0000000000..43bdba552d --- /dev/null +++ b/doc/how-to/replication/repl_bootstrap_auto.rst @@ -0,0 +1,438 @@ +.. _replication-bootstrap-auto: + +Master-replica: automated failover +================================== + +**Example on GitHub**: `auto_leader `_ + + +.. _replication-automated-failover-tt-env: + +Prerequisites +------------- + +Before starting this tutorial: + +1. Install the :ref:`tt ` utility. + +2. Create a tt environment in the current directory by executing the :ref:`tt init ` command. + +3. Inside the ``instances.enabled`` directory of the created tt environment, create the ``auto_leader`` directory. + +4. Inside ``instances.enabled/auto_leader``, create the ``instances.yml`` and ``config.yaml`` files: + + - ``instances.yml`` specifies instances to :ref:`run ` in the current environment and should look like this: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/instances.yml + :language: yaml + :dedent: + + - The ``config.yaml`` file is intended to store a :ref:`replica set configuration `. + + + + +.. _replication-automated-failover-configure-cluster: + +Configuring a replica set +------------------------- + +This section describes how to configure a replica set in ``config.yaml``. + +.. _replication-automated-failover_configuring_failover_mode: + +Step 1: Configuring a failover mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, set the :ref:`replication.failover ` option to ``election``: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/config.yaml + :language: yaml + :start-at: replication: + :end-at: failover: election + :dedent: + +.. _replication-automated-failover_configuring_topology: + +Step 2: Defining a replica set topology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Define a replica set topology inside the :ref:`groups ` section. +The :ref:`iproto.listen ` option specifies an address used to listen for incoming requests and allows replicas to communicate with each other. + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/config.yaml + :language: yaml + :start-at: groups: + :end-at: listen: 127.0.0.1:3303 + :dedent: + + +.. _replication-automated-failover_configuring_credentials: + +Step 3: Creating a user for replication +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the :ref:`credentials ` section, create the ``replicator`` user with the ``replication`` role: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/config.yaml + :language: yaml + :start-at: credentials: + :end-at: roles: [replication] + :dedent: + + +.. _replication-automated-failover_configuring_advertise: + +Step 4: Specifying advertise URIs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set :ref:`iproto.advertise.peer ` to advertise the current instance to other replica set members: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/config.yaml + :language: yaml + :start-at: iproto: + :end-at: peer: replicator@ + :dedent: + +.. _replication-automated-failover_configuring_result: + +Resulting configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +The resulting replica set configuration should look as follows: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/config.yaml + :language: yaml + :end-at: listen: 127.0.0.1:3303 + :dedent: + + + + +.. _replication-automated-failover-work: + +Working with a replica set +-------------------------- + +.. _replication-automated-failover-start-instances: + +Starting instances +~~~~~~~~~~~~~~~~~~ + +1. After configuring a replica set, execute the :ref:`tt start ` command from the :ref:`tt environment directory `: + + .. code-block:: console + + $ tt start auto_leader + • Starting an instance [auto_leader:instance001]... + • Starting an instance [auto_leader:instance002]... + • Starting an instance [auto_leader:instance003]... + +2. Check that instances are in the ``RUNNING`` status using the :ref:`tt status ` command: + + .. code-block:: console + + $ tt status auto_leader + INSTANCE STATUS PID + auto_leader:instance001 RUNNING 24768 + auto_leader:instance002 RUNNING 24769 + auto_leader:instance003 RUNNING 24767 + + + +.. _replication-automated-failover-work-status: + +Checking a replica set status +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Connect to ``instance001`` using :ref:`tt connect `: + + .. code-block:: console + + $ tt connect auto_leader:instance001 + • Connecting to the instance... + • Connected to auto_leader:instance001 + +2. Check the instance state in regard to :ref:`leader election ` using :ref:`box.info.election `. + The output below shows that ``instance001`` is a follower while ``instance002`` is a replica set leader. + + .. code-block:: console + + auto_leader:instance001> box.info.election + --- + - leader_idle: 0.77491499999815 + leader_name: instance002 + state: follower + vote: 0 + term: 2 + leader: 1 + ... + +3. Check that ``instance001`` is in read-only mode using ``box.info.ro``: + + .. code-block:: console + + auto_leader:instance001> box.info.ro + --- + - true + ... + +4. Execute ``box.info.replication`` to check a replica set status. + Make sure that ``upstream.status`` and ``downstream.status`` are ``follow`` for ``instance002`` and ``instance003``. + + .. code-block:: console + + auto_leader:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 9 + upstream: + status: follow + idle: 0.8257709999998 + peer: replicator@127.0.0.1:3302 + lag: 0.00012326240539551 + name: instance002 + downstream: + status: follow + idle: 0.81174199999805 + vclock: {1: 9} + lag: 0 + 2: + id: 2 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 0 + name: instance001 + 3: + id: 3 + uuid: 9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6 + lsn: 0 + upstream: + status: follow + idle: 0.83125499999733 + peer: replicator@127.0.0.1:3303 + lag: 0.00010204315185547 + name: instance003 + downstream: + status: follow + idle: 0.83213399999659 + vclock: {1: 9} + lag: 0 + ... + + +.. _replication-automated-failover-add-data: + +Adding data +~~~~~~~~~~~ + +To check that replicas (``instance001`` and ``instance003``) get all updates from the master(``instance002``), follow the steps below: + +1. Connect to ``instance002`` using ``tt connect``: + + .. code-block:: console + + $ tt connect auto_leader:instance002 + • Connecting to the instance... + • Connected to auto_leader:instance002 + +2. Create a space and add data as described in :ref:`CRUD operation examples `. + +3. Use the ``select`` operation on ``instance001`` and ``instance003`` to make sure data is replicated. + +4. Check that the 1-st component of :ref:`box.info.vclock ` values are the same on all instances: + + - ``instance001``: + + .. code-block:: console + + auto_leader:instance001> box.info.vclock + --- + - {0: 1, 1: 32} + ... + + - ``instance002``: + + .. code-block:: console + + auto_leader:instance002> box.info.vclock + --- + - {0: 1, 1: 32} + ... + + - ``instance003``: + + .. code-block:: console + + auto_leader:instance003> box.info.vclock + --- + - {0: 1, 1: 32} + ... + + .. NOTE:: + + The 0-th component of ``vclock`` isn't related to replication. + + + + +.. _replication-automated-failover-testing: + +Testing automated failover +-------------------------- + +To test how automated failover works if the current master is stopped, follow the steps below: + +1. Stop the current master instance (``instance002``) using the ``tt stop`` command: + + .. code-block:: console + + $ tt stop auto_leader:instance002 + • The Instance auto_leader:instance002 (PID = 24769) has been terminated. + + +2. On ``instance001``, check ``box.info.election``. + In this example, a new replica set leader is ``instance001``. + + .. code-block:: console + + auto_leader:instance001> box.info.election + --- + - leader_idle: 0 + leader_name: instance001 + state: leader + vote: 2 + term: 3 + leader: 2 + ... + +3. Check replication status using ``box.info.replication`` for ``instance002``: + + - ``upstream.status`` is ``disconnected``. + - ``downstream.status`` is ``stopped``. + + .. code-block:: console + + auto_leader:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 32 + upstream: + peer: replicator@127.0.0.1:3302 + lag: 0.00032305717468262 + status: disconnected + idle: 48.352504000002 + message: 'connect, called on fd 20, aka 127.0.0.1:62575: Connection refused' + system_message: Connection refused + name: instance002 + downstream: + status: stopped + message: 'unexpected EOF when reading from socket, called on fd 32, aka 127.0.0.1:3301, + peer of 127.0.0.1:62204: Broken pipe' + system_message: Broken pipe + 2: + id: 2 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 1 + name: instance001 + 3: + id: 3 + uuid: 9a3a1b9b-8a18-baf6-00b3-a6e5e11fd8b6 + lsn: 0 + upstream: + status: follow + idle: 0.18620999999985 + peer: replicator@127.0.0.1:3303 + lag: 0.00012516975402832 + name: instance003 + downstream: + status: follow + idle: 0.19718099999955 + vclock: {2: 1, 1: 32} + lag: 0.00051403045654297 + ... + + +4. Start ``instance002`` back using ``tt start``: + + .. code-block:: console + + $ tt start auto_leader:instance002 + • Starting an instance [auto_leader:instance002]... + + +.. _replication-automated-failover-new-leader: + +Choosing a leader manually +-------------------------- + +1. Make sure that :ref:`box.info.vclock ` values (excluding the 0-th components) are the same on all instances: + + - ``instance001``: + + .. code-block:: console + + auto_leader:instance001> box.info.vclock + --- + - {0: 2, 1: 32, 2: 1} + ... + + - ``instance002``: + + .. code-block:: console + + auto_leader:instance002> box.info.vclock + --- + - {0: 2, 1: 32, 2: 1} + ... + + + - ``instance003``: + + .. code-block:: console + + auto_leader:instance003> box.info.vclock + --- + - {0: 3, 1: 32, 2: 1} + ... + +2. On ``instance002``, run :ref:`box.ctl.promote() ` to choose it as a new replica set leader: + + .. code-block:: console + + auto_leader:instance002> box.ctl.promote() + --- + ... + +3. Check ``box.info.election`` to make sure ``instance002`` is a leader now: + + .. code-block:: console + + auto_leader:instance002> box.info.election + --- + - leader_idle: 0 + leader_name: instance002 + state: leader + vote: 1 + term: 4 + leader: 1 + ... + + + +.. _replication-automated-failover-add-remove-instances: + +Adding and removing instances +----------------------------- + +The process of adding instances to a replica set and removing them is similar for all failover modes. +Learn how to do this from the :ref:`Master-replica: manual failover ` tutorial: + +- :ref:`Adding instances ` +- :ref:`Removing instances ` + +Before removing an instance from a replica set with :ref:`replication.failover ` set to ``election``, make sure this instance is in read-only mode. +If the instance is a master, choose a :ref:`new leader manually `. diff --git a/doc/how-to/replication/repl_bootstrap_master_master.rst b/doc/how-to/replication/repl_bootstrap_master_master.rst new file mode 100644 index 0000000000..93f1835464 --- /dev/null +++ b/doc/how-to/replication/repl_bootstrap_master_master.rst @@ -0,0 +1,250 @@ +.. _replication-bootstrap-master-master: + +Master-master +============= + +**Example on GitHub**: `master_master `_ + + +.. _replication-master-master-tt-env: + +Prerequisites +------------- + +Before starting this tutorial: + +1. Install the :ref:`tt ` utility. + +2. Create a tt environment in the current directory by executing the :ref:`tt init ` command. + +3. Inside the ``instances.enabled`` directory of the created tt environment, create the ``master_master`` directory. + +4. Inside ``instances.enabled/master_master``, create the ``instances.yml`` and ``config.yaml`` files: + + - ``instances.yml`` specifies instances to :ref:`run ` in the current environment and should look like this: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/instances.yml + :language: yaml + :end-at: instance002: + :dedent: + + - The ``config.yaml`` file is intended to store a :ref:`replica set configuration `. + + + + +.. _replication-master-master-configure-cluster: + +Configuring a replica set +------------------------- + +This section describes how to configure a replica set in ``config.yaml``. + + +.. _replication-master-master-configure-failover_mode: + +Step 1: Configuring a failover mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, set the :ref:`replication.failover ` option to ``off``: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/config.yaml + :language: yaml + :start-at: replication: + :end-at: failover: off + :dedent: + + + +.. _replication-master-master-configure-topology: + +Step 2: Defining a replica set topology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Define a replica set topology inside the :ref:`groups ` section: + +- The ``database.mode`` option should be set to ``rw`` to make instances work in read-write mode. +- The :ref:`iproto.listen ` option specifies an address used to listen for incoming requests and allows replicas to communicate with each other. + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/config.yaml + :language: yaml + :start-at: groups: + :end-at: listen: 127.0.0.1:3302 + :dedent: + + +.. _replication-master-master-configure_credentials: + +Step 3: Creating a user for replication +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the :ref:`credentials ` section, create the ``replicator`` user with the ``replication`` role: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/config.yaml + :language: yaml + :start-at: credentials: + :end-at: roles: [replication] + :dedent: + + +.. _replication-master-master-configure_advertise: + +Step 4: Specifying advertise URIs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set :ref:`iproto.advertise.peer ` to advertise the current instance to other replica set members: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/config.yaml + :language: yaml + :start-at: iproto: + :end-at: peer: replicator@ + :dedent: + + +.. _replication-master-master-configure_result: + +Resulting configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +The resulting replica set configuration should look as follows: + +.. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/config.yaml + :language: yaml + :end-at: listen: 127.0.0.1:3302 + :dedent: + + + + +.. _replication-master-master-work: + +Working with a replica set +-------------------------- + +.. _replication-master-master-start-instances: + +Starting instances +~~~~~~~~~~~~~~~~~~ + +1. After configuring a replica set, execute the :ref:`tt start ` command from the :ref:`tt environment directory `: + + .. code-block:: console + + $ tt start master_master + • Starting an instance [master_master:instance001]... + • Starting an instance [master_master:instance002]... + +2. Check that instances are in the ``RUNNING`` status using the :ref:`tt status ` command: + + .. code-block:: console + + $ tt status master_master + INSTANCE STATUS PID + master_master:instance001 RUNNING 30818 + master_master:instance002 RUNNING 30819 + + +.. _replication-master-master-check-status: + +Checking a replica set status +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Connect to both instances using :ref:`tt connect `. + Below is the example for ``instance001``: + + .. code-block:: console + + $ tt connect master_master:instance001 + • Connecting to the instance... + • Connected to master_master:instance001 + +2. Check that both instances are writable using ``box.info.ro``: + + - ``instance001``: + + .. code-block:: console + + master_master:instance001> box.info.ro + --- + - false + ... + + - ``instance002``: + + .. code-block:: console + + master_master:instance002> box.info.ro + --- + - false + ... + +3. Execute ``box.info.replication`` to check a replica set status. + For ``instance002``, ``upstream.status`` and ``downstream.status`` should be ``follow``. + + .. code-block:: console + + master_master:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 7 + upstream: + status: follow + idle: 0.21281599999929 + peer: replicator@127.0.0.1:3302 + lag: 0.00031614303588867 + name: instance002 + downstream: + status: follow + idle: 0.21800899999653 + vclock: {1: 7} + lag: 0 + 2: + id: 2 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 0 + name: instance001 + ... + + + + +.. _replication-master-master-add-data: + +Adding data +~~~~~~~~~~~ + +To check that both instances get updates from each other, follow the steps below: + +1. On ``instance001``, create a space and format it as described in :ref:`CRUD operation examples `. Add sample data to this space. + +2. On ``instance002``, use the ``select`` operation to make sure data is replicated. + +3. Add more data to the created space on ``instance002``. + +4. Get back to ``instance001`` and use ``select`` to make sure new data is replicated. + +5. Check that :ref:`box.info.vclock ` values are the same on both instances: + + - ``instance001``: + + .. code-block:: console + + master_master:instance001> box.info.vclock + --- + - {2: 8, 1: 12} + ... + + - ``instance002``: + + .. code-block:: console + + master_master:instance002> box.info.vclock + --- + - {2: 8, 1: 12} + ... + + + + diff --git a/doc/how-to/replication/repl_leader_elect.rst b/doc/how-to/replication/repl_leader_elect.rst deleted file mode 100644 index 2daca37163..0000000000 --- a/doc/how-to/replication/repl_leader_elect.rst +++ /dev/null @@ -1,114 +0,0 @@ -.. _how-to-repl_leader_elect: - -Managing leader elections -========================= - -Starting from version :doc:`2.6.1 `, -Tarantool has the built-in functionality -managing automated leader election in a replica set. -Learn more about the :ref:`concept of leader election `. - -.. _repl_leader_elect_config: - -Configuration -------------- - -.. code-block:: console - - box.cfg({ - election_mode = , - election_fencing_mode = , - election_timeout = , - replication_timeout = , - replication_synchro_quorum = - }) - -* ``election_mode`` -- specifies the role of a node in the leader election - process. For the details, refer to the :ref:`option description ` - in the configuration reference. -* ``election_fencing_mode`` -- specifies the :ref:`leader fencing mode `. - For the details, refer to the :ref:`option description ` in the configuration reference. -* ``election_timeout`` -- specifies the timeout between election rounds if the - previous round ended up with a split vote. For the details, refer to the - :ref:`option description ` in the configuration - reference. -* ``replication_timeout`` -- reuse of the :ref:`replication_timeout ` - configuration option for the purpose of the leader election process. - Heartbeats sent by an active leader have a timeout after which a new election - starts. Heartbeats are sent once per seconds. - The default value is ``1``. The leader is considered dead if it hasn't sent any - heartbeats for the period of ``replication_timeout * 4``. -* ``replication_synchro_quorum`` -- reuse of the :ref:`replication_synchro_quorum ` - option for the purpose of configuring the election quorum. The default value is ``1``, - meaning that each node becomes a leader immediately after voting for itself. - It is best to set up this option value to the ``( / 2) + 1``. - Otherwise, there is no guarantee that there is only one leader at a time. - -It is important to know that being a leader is not the only requirement for a node to be writable. -The leader should also satisfy the following requirements: - -* The :ref:`read_only ` option is set to ``false``. - -* The leader shouldn't be in the orphan state. - -Nothing prevents you from setting the ``read_only`` option to ``true``, -but the leader just won't be writable then. The option doesn't affect the -election process itself, so a read-only instance can still vote and become -a leader. - -.. _repl_leader_elect_monitoring: - -Monitoring ----------- - -To monitor the current state of a node regarding the leader election, you can -use the ``box.info.election`` function. -For details, -refer to the :doc:`function description `. - -**Example:** - -.. code-block:: console - - tarantool> box.info.election - --- - - state: follower - vote: 0 - leader: 0 - term: 1 - ... - -The Raft-based election implementation logs all its actions -with the ``RAFT:`` prefix. The actions are new Raft message handling, -node state changing, voting, and term bumping. - -.. _repl_leader_elect_important: - -Important notes ---------------- - -Leader election doesn't work correctly if the election quorum is set to less or equal -than `` / 2`` because in that case, a split vote can lead to -a state when two leaders are elected at once. - -For example, suppose there are five nodes. When the quorum is set to ``2``, ``node1`` -and ``node2`` can both vote for ``node1``. ``node3`` and ``node4`` can both vote -for ``node5``. In this case, ``node1`` and ``node5`` both win the election. -When the quorum is set to the cluster majority, that is -``( / 2) + 1`` or greater, the split vote is impossible. - -That should be considered when adding new nodes. -If the majority value is changing, it's better to update the quorum on all the existing nodes -before adding a new one. - -Also, the automated leader election doesn't bring many benefits in terms of data -safety when used *without* :ref:`synchronous replication `. -If the replication is asynchronous and a new leader gets elected, -the old leader is still active and considers itself the leader. -In such case, nothing stops -it from accepting requests from clients and making transactions. -Non-synchronous transactions are successfully committed because -they are not checked against the quorum of replicas. -Synchronous transactions fail because they are not able -to collect the quorum -- most of the replicas reject -these old leader's transactions since it is not a leader anymore. diff --git a/doc/how-to/replication/repl_remove_instances.rst b/doc/how-to/replication/repl_remove_instances.rst deleted file mode 100644 index 4ebd93cc25..0000000000 --- a/doc/how-to/replication/repl_remove_instances.rst +++ /dev/null @@ -1,225 +0,0 @@ -.. _replication-remove_instances: - -Removing instances -================== - -Let's assume that we have the following configured replica set with 3 instances -(*instance1*, *instance2* and *intance3*) and we want to remove *instance2*. - -.. image:: /concepts/replication/images/replication.svg - :align: left - -To remove it politely, follow these steps: - -1. Disconnect *instance2* from the cluster. -2. Disconnect the cluster from *instance2*. -3. Remove *instance2* from the ``_cluster`` space. - -.. image:: replicationX.svg - :align: left - -**Step 1: disconnecting an instance from the cluster** - -On the disconnecting instance *instance2*, run ``box.cfg{}`` -with a blank replication source: - -.. code-block:: tarantoolsession - - tarantool> box.cfg{replication=''} - -Then check that it was disconnected. Take a look at ``box.info.replication`` -on *instance2* (notice that ``replication.{1,3}.upstream`` is absent): - -.. code-block:: tarantoolsession - - tarantool> box.info.replication - --- - - 1: - id: 1 - uuid: db89978f-7115-4537-8417-9982bb5a256f - lsn: 9 - -- upstream is absent - downstream: - status: follow - idle: 0.93983899999876 - vclock: {1: 9} - 2: - id: 2 - uuid: 0a756d14-e437-4296-85b0-d27a0621613e - lsn: 0 - 3: - id: 3 - uuid: bb362584-c265-4e53-aeb6-450ae818bf59 - lsn: 0 - -- upstream is absent - downstream: - status: follow - idle: 0.26624799999991 - vclock: {1: 9} - ... - -Check *instance1* and *instance3* as well -(notice that the status of ``replication.2.downstream`` is ``stopped``): - -.. code-block:: tarantoolsession - - -- instance1 - tarantool> box.info.replication - --- - - 1: - id: 1 - uuid: db89978f-7115-4537-8417-9982bb5a256f - lsn: 9 - 2: - id: 2 - uuid: 0a756d14-e437-4296-85b0-d27a0621613e - lsn: 0 - upstream: - status: follow - idle: 0.3533439999992 - peer: replicator@localhost:3302 - lag: 0.0001220703125 - downstream: - status: stopped -- status has changed: - message: unexpected EOF when reading from socket, called on fd 13, aka [::1]:3301, - peer of [::1]:53776 - system_message: Broken pipe - 3: - id: 3 - uuid: bb362584-c265-4e53-aeb6-450ae818bf59 - lsn: 0 - upstream: - status: follow - idle: 0.35327999999936 - peer: replicator@localhost:3303 - lag: 0.00018095970153809 - downstream: - status: follow - idle: 0.68685100000221 - vclock: {1: 9} - ... - -**Step 2: disconnecting the cluster from the decommissioned instance** - -On every other instance in the cluster remove *instance2* from -the ``box.cfg{ replication }`` list and call an appropriate -``box.cfg{ replication = {instance1, instance3} }``: - -.. code-block:: tarantoolsession - - tarantool> box.cfg{ replication = { 'instance1-uri', 'instance3-uri' } } - -Take a look at ``box.info.replication`` on *instance2* to check that *instance1* and *instance3* were -disconnected -(notice that the status of ``replication.{1,3}.downstream`` is ``stopped``): - -.. code-block:: tarantoolsession - - tarantool> box.info.replication - --- - - 1: - id: 1 - uuid: db89978f-7115-4537-8417-9982bb5a256f - lsn: 9 - downstream: - status: stopped -- status has changed - message: unexpected EOF when reading from socket, called on fd 16, aka [::1]:3302, - peer of [::1]:53832 - system_message: Broken pipe - 2: - id: 2 - uuid: 0a756d14-e437-4296-85b0-d27a0621613e - lsn: 0 - 3: - id: 3 - uuid: bb362584-c265-4e53-aeb6-450ae818bf59 - lsn: 0 - downstream: - status: stopped -- status has changed - message: unexpected EOF when reading from socket, called on fd 18, aka [::1]:3302, - peer of [::1]:53825 - system_message: Broken pipe - ... - -Check *instance1* and *instance3* as well -(notice that the status of ``replication.2.upstream`` is ``stopped``): - -.. code-block:: tarantoolsession - - -- instance1 - tarantool> box.info.replication - --- - - 1: - id: 1 - uuid: db89978f-7115-4537-8417-9982bb5a256f - lsn: 9 - 2: - id: 2 - uuid: 0a756d14-e437-4296-85b0-d27a0621613e - lsn: 0 - downstream: - status: stopped -- status has changed - message: unexpected EOF when reading from socket, called on fd 13, aka [::1]:3301, - peer of [::1]:53776 - system_message: Broken pipe - 3: - id: 3 - uuid: bb362584-c265-4e53-aeb6-450ae818bf59 - lsn: 0 - upstream: - status: follow - idle: 0.50240100000156 - peer: replicator@localhost:3303 - lag: 0.00015711784362793 - downstream: - status: follow - idle: 0.14237199999843 - vclock: {1: 9} - ... - -**Step 3: persistent removal** - -If a removed instance rejoins later, it will receive all the updates made -by the other instances while it was disconnected. - -If an instance is decommissioned forever, we should clean up the ``_cluster`` space. -First, discover the ``id`` and ``uuid`` of the instance. -On *instance2*, call ``return box.info.id, box.info.uuid``: - -.. code-block:: tarantoolsession - - tarantool> return box.info.id, box.info.uuid - --- - - 2 - - '0a756d14-e437-4296-85b0-d27a0621613e' - ... - -Take a note of ``id`` and ``uuid``. - -Now choose any master from the remaining cluster and perform the following actions on it -(let's assume that we chose *instance1*): - -1. Select all records from the ``_cluster`` space: - -.. code-block:: tarantoolsession - - tarantool> box.space._cluster:select{} - --- - - - [1, 'db89978f-7115-4537-8417-9982bb5a256f'] - - [2, '0a756d14-e437-4296-85b0-d27a0621613e'] - - [3, 'bb362584-c265-4e53-aeb6-450ae818bf59'] - ... - -2. Check if the ``id`` and ``uuid`` of *instance2* are correct and remove them - from the cluster: - -.. code-block:: tarantoolsession - - tarantool> box.space._cluster:delete(2) - --- - - [2, '0a756d14-e437-4296-85b0-d27a0621613e'] - ... - -**Final checks** - -After all modifications, say ``box.info.replication`` to check the health status. diff --git a/doc/how-to/replication/repl_sync.rst b/doc/how-to/replication/repl_sync.rst deleted file mode 100644 index 9c89429eff..0000000000 --- a/doc/how-to/replication/repl_sync.rst +++ /dev/null @@ -1,108 +0,0 @@ -.. _how-to-repl_sync: - -Configuring synchronous replication -=================================== - -Since version :doc:`2.5.1 `, -:ref:`synchronous replication ` -can be enabled per-space by using the ``is_sync`` option: - -.. code-block:: lua - - box.schema.create_space('test1', {is_sync = true}) - -Any transaction doing a DML request on this space becomes synchronous. -Notice that DDL on this space (including truncation) is **not** synchronous. - -To control the behavior of synchronous transactions, there exist global -``box.cfg`` :ref:`options `: - -.. code-block:: lua - - box.cfg{replication_synchro_quorum = } - -.. code-block:: lua - - box.cfg{replication_synchro_quorum = "N / 2 + 1"} - -This option tells how many replicas should confirm the receipt of a synchronous transaction before it is committed. -Since version :doc:`2.5.3 `, the parameter supports dynamic evaluation of the quorum number -(see :ref:`reference for the replication_synchro_quorum parameter ` for details). -Since version :doc:`2.10.0 `, -this option does not account for anonymous replicas. -As a usage example, consider this: - -.. code-block:: lua - - -- Instance 1 - box.cfg{ - listen = 3313, - replication_synchro_quorum = 2, - } - box.schema.user.grant('guest', 'super') - _ = box.schema.space.create('sync', {is_sync=true}) - _ = _:create_index('pk') - -.. code-block:: lua - - -- Instance 2 - box.cfg{ - listen = 3314, - replication = 'localhost:3313' - } - -.. code-block:: lua - - -- Instance 1 - box.space.sync:replace{1} - -When the first instance makes ``replace()``, it won't finish until the second -instance confirms its receipt and successful appliance. Note that the quorum is -set to 2, but the transaction was still committed even though there is only one -replica. This is because the master instance itself also participates in the quorum. - -Now, if the second instance is down, the first one won't be able to commit any -synchronous change. - -.. code-block:: lua - - -- Instance 2 - Ctrl+D - -.. code-block:: tarantoolsession - - -- Instance 1 - tarantool> box.space.sync:replace{2} - --- - - error: Quorum collection for a synchronous transaction is timed out - ... - -The transaction wasn't committed because it failed to achieve the quorum in the -given time. The time is a second configuration option: - -.. code-block:: lua - - box.cfg{replication_synchro_timeout = } - -It tells how many seconds to wait for a synchronous transaction quorum -replication until it is declared failed and is rolled back. - -A successful synchronous transaction commit is persisted in the WAL as a special -CONFIRM record. The rollbacks are similarly persisted with a ROLLBACK record. - -The ``timeout`` and ``quorum`` options are not used on replicas. It means if -the master dies, the pending synchronous transactions will be kept waiting on -the replicas until a new master is elected. - - -Tips and tricks ---------------- - -If a transaction is rolled back, it does not mean the ROLLBACK message reached -the replicas. It still can happen that the master node suddenly dies, so the -transaction will be committed by the new master. Your application logic should be -ready for that. - -Synchronous transactions are better to use with full mesh. Then the replicas can -talk to each other in case of the master node's death and still confirm some -pending transactions. diff --git a/doc/reference/configuration/configuration_reference.rst b/doc/reference/configuration/configuration_reference.rst index e5e04f1eb2..cc0ab681fe 100644 --- a/doc/reference/configuration/configuration_reference.rst +++ b/doc/reference/configuration/configuration_reference.rst @@ -1081,6 +1081,9 @@ The ``replication`` section defines configuration parameters related to :ref:`re .. TODO: https://github.com/tarantool/enterprise_doc/issues/253 + + See also: :ref:`Replication tutorials `. + .. NOTE:: ``replication.failover`` can be defined in the global, group, and replica set :ref:`scope `. diff --git a/doc/reference/reference_lua/box_ctl/promote.rst b/doc/reference/reference_lua/box_ctl/promote.rst index 6e871b17aa..b6d75de2d0 100644 --- a/doc/reference/reference_lua/box_ctl/promote.rst +++ b/doc/reference/reference_lua/box_ctl/promote.rst @@ -10,7 +10,7 @@ box.ctl.promote() Wait, then choose new replication leader. - For :ref:`synchronous transactions ` it is + For :ref:`synchronous transactions ` it is possible that a new leader will be chosen but the transactions of the old leader have not been completed. Therefore to finalize the transaction, the function ``box.ctl.promote()`` diff --git a/doc/reference/reference_lua/box_info/election.rst b/doc/reference/reference_lua/box_info/election.rst index 39b99a5e46..68f418ba98 100644 --- a/doc/reference/reference_lua/box_info/election.rst +++ b/doc/reference/reference_lua/box_info/election.rst @@ -4,41 +4,45 @@ box.info.election ================================================================================ -.. module:: box.info +.. module:: box.info -.. data:: election +.. data:: election - Since version :doc:`2.6.1 `. - Show the current state of a replica set node in regards to - :ref:`leader election `. + Since version :doc:`2.6.1 `. - The following information is provided: + Show the current state of a replica set node in regards to :ref:`leader election `. + The following information is provided: - * ``state`` -- election state (mode) of the node. Possible values are ``leader``, ``follower``, or ``candidate``. + * ``state`` -- the election state (mode) of the node. Possible values are ``leader``, ``follower``, or ``candidate``. For more details, refer to description of the :ref:`leader election process `. - When election is enabled, the node is writable only in the ``leader`` state. + When :ref:`replication.failover ` is set to ``election``, the node is writable only in the ``leader`` state. - * ``term`` -- current election term. + * ``term`` -- the current election term. - * ``vote`` -- ID of a node the current node votes for. If the value is ``0``, it means the node hasn't voted in the current term yet. + * ``vote`` -- the ID of a node the current node votes for. If the value is ``0``, it means the node hasn't voted in the current term yet. - * ``leader`` -- leader node ID in the current term. If the value is ``0``, it means the node doesn't know which node is the leader in the current term. + * ``leader`` -- a leader node ID in the current term. If the value is ``0``, it means the node doesn't know which node is the leader in the current term. - * ``leader_idle`` -- time in seconds since the last interaction with the known leader. Since version :doc:`2.10.0 `. + * ``leader_name`` -- a leader name. Returns ``nil`` if there is no leader in a cluster or :ref:`box.NULL ` if a leader does not have a name. Since version :doc:`3.0.0 `. - .. note:: + * ``leader_idle`` -- time in seconds since the last interaction with the known leader. Since version :doc:`2.10.0 `. + + .. note:: IDs in the ``box.info.election`` output are the replica IDs visible in the ``box.info.id`` output on each node and in the ``_cluster`` space. - **Example:** + **Example:** + + .. code-block:: tarantoolsession - .. code-block:: tarantoolsession + auto_leader:instance001> box.info.election + --- + - leader_idle: 0 + leader_name: instance001 + state: leader + vote: 2 + term: 3 + leader: 2 + ... - tarantool> box.info.election - --- - - state: follower - term: 2 - vote: 0 - leader: 0 - leader_idle: 0.45112800000061 - ... + See also: :ref:`Master-replica: automated failover `. \ No newline at end of file diff --git a/doc/reference/reference_lua/box_schema/space_create.rst b/doc/reference/reference_lua/box_schema/space_create.rst index 9c98cd22d4..8e51c31767 100644 --- a/doc/reference/reference_lua/box_schema/space_create.rst +++ b/doc/reference/reference_lua/box_schema/space_create.rst @@ -136,10 +136,19 @@ space_opts | Default: ``false`` .. _space_opts_is_sync: + .. _how-to-repl_sync: .. data:: is_sync - Any transaction doing a DML request on this space becomes synchronous. + Any transaction doing a DML request on this space becomes :ref:`synchronous `. + + **Example:** + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/auto_leader/data.lua + :language: lua + :start-at: is_sync = true + :end-at: is_sync = true + :dedent: | Type: boolean | Default: ``false`` diff --git a/locale/ru/LC_MESSAGES/concepts/replication/repl_architecture.po b/locale/ru/LC_MESSAGES/concepts/replication/repl_architecture.po index cf6053508c..7617489e78 100644 --- a/locale/ru/LC_MESSAGES/concepts/replication/repl_architecture.po +++ b/locale/ru/LC_MESSAGES/concepts/replication/repl_architecture.po @@ -234,7 +234,7 @@ msgstr "" "системой значений -- см. описание конфигурационного параметра " ":ref:`replicaset_uuid `." -msgid "To learn how to enable replication, check the :ref:`how-to guide `." +msgid "To learn how to enable replication, check the :ref:`how-to guide `." msgstr "Чтобы узнать, как включить репликацию, перейдите на страницу " ":ref:`практическое руководство `." From db417ee7c4af3b1923e42e663bafdd43a86ebb5e Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Wed, 29 Nov 2023 16:30:17 +0300 Subject: [PATCH 2/6] 3.0 configuration: update master-master sample --- .../master_master/config.yaml | 9 ++++---- .../master_master/instances.yml | 3 +-- .../instances.enabled/master_master/myapp.lua | 22 +++++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 doc/code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua diff --git a/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml b/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml index 5b637cadfa..7eb2da12eb 100644 --- a/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml +++ b/doc/code_snippets/snippets/replication/instances.enabled/master_master/config.yaml @@ -26,8 +26,7 @@ groups: mode: rw iproto: listen: 127.0.0.1:3302 - instance003: - database: - mode: rw - iproto: - listen: 127.0.0.1:3303 \ No newline at end of file + +# Load sample data +app: + file: 'myapp.lua' \ No newline at end of file diff --git a/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml b/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml index 6c765b2e67..75e286d69c 100644 --- a/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml +++ b/doc/code_snippets/snippets/replication/instances.enabled/master_master/instances.yml @@ -1,3 +1,2 @@ instance001: -instance002: -instance003: \ No newline at end of file +instance002: \ No newline at end of file diff --git a/doc/code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua b/doc/code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua new file mode 100644 index 0000000000..321db0aab1 --- /dev/null +++ b/doc/code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua @@ -0,0 +1,22 @@ +function create_space() + box.schema.space.create('bands') + box.space.bands:format({ + { name = 'id', type = 'unsigned' }, + { name = 'band_name', type = 'string' }, + { name = 'year', type = 'unsigned' } + }) + box.space.bands:create_index('primary', { parts = { 'id' } }) +end + +function load_data() + box.space.bands:insert { 1, 'Roxette', 1986 } + box.space.bands:insert { 2, 'Scorpions', 1965 } + box.space.bands:insert { 3, 'Ace of Base', 1987 } + box.space.bands:insert { 4, 'The Beatles', 1960 } + box.space.bands:insert { 5, 'Pink Floyd', 1965 } + box.space.bands:insert { 6, 'The Rolling Stones', 1962 } + box.space.bands:insert { 7, 'The Doors', 1965 } + box.space.bands:insert { 8, 'Nirvana', 1987 } + box.space.bands:insert { 9, 'Led Zeppelin', 1968 } + box.space.bands:insert { 10, 'Queen', 1970 } +end From c35b0164c50e44357fd3df95ae642bb14fa4962a Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Thu, 30 Nov 2023 13:35:17 +0300 Subject: [PATCH 3/6] 3.0 configuration: update replication tutorials (review fixes 1) --- doc/how-to/replication/repl_bootstrap.rst | 10 +++++++++- doc/how-to/replication/repl_bootstrap_auto.rst | 3 +-- .../replication/repl_bootstrap_master_master.rst | 4 ++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/how-to/replication/repl_bootstrap.rst b/doc/how-to/replication/repl_bootstrap.rst index 9bfbb6a972..a7f1c2abf9 100644 --- a/doc/how-to/replication/repl_bootstrap.rst +++ b/doc/how-to/replication/repl_bootstrap.rst @@ -232,6 +232,10 @@ To check that a replica (``instance002``) gets all updates from the master, foll - {1: 21} ... + .. NOTE:: + + Note that a ``vclock`` value might include the 0-th component that is related to local space operations and might differ for different instances in a replica set. + .. _replication-add_instances: @@ -290,6 +294,8 @@ Starting an instance Reloading configuration ~~~~~~~~~~~~~~~~~~~~~~~ +After adding ``instance003`` to the configuration and starting it, configurations on all instances should be reloaded to allow ``instance001`` and ``instance002`` to get data from the new instance in case it becomes a master: + 1. Connect to ``instance003`` using ``tt connect``: .. code-block:: console @@ -598,7 +604,9 @@ Removing an instance from the configuration Removing an instance from the '_cluster' space ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -1. Select all the tuples in the :ref:`box.space._cluster ` system space: +To remove an instance from the replica set permanently, it should be removed from the :ref:`box.space._cluster ` system space: + +1. Select all the tuples in the ``box.space._cluster`` system space: .. code-block:: console diff --git a/doc/how-to/replication/repl_bootstrap_auto.rst b/doc/how-to/replication/repl_bootstrap_auto.rst index 43bdba552d..2cf69935f2 100644 --- a/doc/how-to/replication/repl_bootstrap_auto.rst +++ b/doc/how-to/replication/repl_bootstrap_auto.rst @@ -272,8 +272,7 @@ To check that replicas (``instance001`` and ``instance003``) get all updates fro .. NOTE:: - The 0-th component of ``vclock`` isn't related to replication. - + The 0-th component of ``vclock`` is related to local space operations and might differ for different instances in a replica set. diff --git a/doc/how-to/replication/repl_bootstrap_master_master.rst b/doc/how-to/replication/repl_bootstrap_master_master.rst index 93f1835464..071966d863 100644 --- a/doc/how-to/replication/repl_bootstrap_master_master.rst +++ b/doc/how-to/replication/repl_bootstrap_master_master.rst @@ -207,6 +207,10 @@ Checking a replica set status name: instance001 ... + .. NOTE:: + + Note that a ``vclock`` value might include the 0-th component that is related to local space operations and might differ for different instances in a replica set. + From 3d33b6d451a6b19bf86a90c294bbf6c4cb58cced Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Thu, 30 Nov 2023 15:56:57 +0300 Subject: [PATCH 4/6] 3.0 configuration: update replication tutorials (resolving conflicts) --- doc/how-to/replication/repl_bootstrap.rst | 4 + .../replication/repl_bootstrap_auto.rst | 6 +- .../repl_bootstrap_master_master.rst | 319 +++++++++++++++++- 3 files changed, 317 insertions(+), 12 deletions(-) diff --git a/doc/how-to/replication/repl_bootstrap.rst b/doc/how-to/replication/repl_bootstrap.rst index a7f1c2abf9..3ee0414a25 100644 --- a/doc/how-to/replication/repl_bootstrap.rst +++ b/doc/how-to/replication/repl_bootstrap.rst @@ -232,10 +232,14 @@ To check that a replica (``instance002``) gets all updates from the master, foll - {1: 21} ... + .. vclock_0th_component_note_start + .. NOTE:: Note that a ``vclock`` value might include the 0-th component that is related to local space operations and might differ for different instances in a replica set. + .. vclock_0th_component_note_end + .. _replication-add_instances: diff --git a/doc/how-to/replication/repl_bootstrap_auto.rst b/doc/how-to/replication/repl_bootstrap_auto.rst index 2cf69935f2..c78ac76f40 100644 --- a/doc/how-to/replication/repl_bootstrap_auto.rst +++ b/doc/how-to/replication/repl_bootstrap_auto.rst @@ -270,9 +270,9 @@ To check that replicas (``instance001`` and ``instance003``) get all updates fro - {0: 1, 1: 32} ... - .. NOTE:: - - The 0-th component of ``vclock`` is related to local space operations and might differ for different instances in a replica set. +.. include:: /how-to/replication/repl_bootstrap.rst + :start-after: vclock_0th_component_note_start + :end-before: vclock_0th_component_note_end diff --git a/doc/how-to/replication/repl_bootstrap_master_master.rst b/doc/how-to/replication/repl_bootstrap_master_master.rst index 071966d863..1fb8792985 100644 --- a/doc/how-to/replication/repl_bootstrap_master_master.rst +++ b/doc/how-to/replication/repl_bootstrap_master_master.rst @@ -207,9 +207,9 @@ Checking a replica set status name: instance001 ... - .. NOTE:: - - Note that a ``vclock`` value might include the 0-th component that is related to local space operations and might differ for different instances in a replica set. +.. include:: /how-to/replication/repl_bootstrap.rst + :start-after: vclock_0th_component_note_start + :end-before: vclock_0th_component_note_end @@ -221,13 +221,41 @@ Adding data To check that both instances get updates from each other, follow the steps below: -1. On ``instance001``, create a space and format it as described in :ref:`CRUD operation examples `. Add sample data to this space. +1. On ``instance001``, create a space, format it, and create a primary index: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua + :start-at: box.schema.space.create + :end-at: box.space.bands:create_index + :language: lua + :dedent: + + Then, add sample data to this space: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua + :start-at: Roxette + :end-at: Scorpions + :language: lua + :dedent: + +2. On ``instance002``, use the ``select`` operation to make sure data is replicated: + + .. code-block:: console + + master_master:instance002> box.space.bands:select() + --- + - - [1, 'Roxette', 1986] + - [2, 'Scorpions', 1965] + ... -2. On ``instance002``, use the ``select`` operation to make sure data is replicated. +3. Add more data to the created space on ``instance002``: -3. Add more data to the created space on ``instance002``. + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua + :start-at: Ace of Base + :end-at: The Beatles + :language: lua + :dedent: -4. Get back to ``instance001`` and use ``select`` to make sure new data is replicated. +4. Get back to ``instance001`` and use ``select`` to make sure new records are replicated. 5. Check that :ref:`box.info.vclock ` values are the same on both instances: @@ -237,7 +265,7 @@ To check that both instances get updates from each other, follow the steps below master_master:instance001> box.info.vclock --- - - {2: 8, 1: 12} + - {2: 5, 1: 9} ... - ``instance002``: @@ -246,9 +274,282 @@ To check that both instances get updates from each other, follow the steps below master_master:instance002> box.info.vclock --- - - {2: 8, 1: 12} + - {2: 5, 1: 9} + ... + + + +.. _replication-master-master-resolve-conflicts: + +Resolving replication conflicts +------------------------------- + +.. NOTE:: + + To learn how to fix and prevent replication conflicts using trigger functions, see :ref:`Resolving replication conflicts `. + +.. _replication-master-master_conflicting_records: + +Inserting conflicting records +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To insert conflicting records to ``instance001`` and ``instance002``, follow the steps below: + +1. Stop ``instance001`` using the ``tt stop`` command: + + .. code-block:: console + + $ tt stop master_master:instance001 + +2. On ``instance002``, insert a new record: + + .. code-block:: lua + + box.space.bands:insert { 5, 'incorrect data', 0 } + +3. Stop ``instance002`` using ``tt stop``: + + .. code-block:: console + + $ tt stop master_master:instance002 + +4. Start ``instance001`` back: + + .. code-block:: lua + + $ tt start master_master:instance001 + +5. Connect to ``instance001`` and insert a record that should conflict with a record already inserted on ``instance002``: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/myapp.lua + :start-at: Pink Floyd + :end-at: Pink Floyd + :language: lua + :dedent: + +6. Start ``instance002`` back: + + .. code-block:: console + + $ tt start master_master:instance002 + + Then, check ``box.info.replication`` on ``instance001``. + ``upstream.status`` should be ``stopped`` because of the ``Duplicate key exists`` error: + + .. code-block:: console + + master_master:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 9 + upstream: + peer: replicator@127.0.0.1:3302 + lag: 143.52251672745 + status: stopped + idle: 3.9462469999999 + message: Duplicate key exists in unique index "primary" in space "bands" with + old tuple - [5, "Pink Floyd", 1965] and new tuple - [5, "incorrect data", 0] + name: instance002 + downstream: + status: stopped + message: 'unexpected EOF when reading from socket, called on fd 12, aka 127.0.0.1:3301, + peer of 127.0.0.1:59258: Broken pipe' + system_message: Broken pipe + 2: + id: 2 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 6 + name: instance001 + ... + + +.. _replication-master-master-reseed-replica: + +Reseeding a replica +~~~~~~~~~~~~~~~~~~~ + +To resolve a replication conflict, ``instance002`` should get the correct data from ``instance001`` first. +To achieve this, ``instance002`` should be rebootstrapped: + +1. In the ``config.yaml`` file, change ``database.mode`` of ``instance002`` to ``ro``: + + .. code-block:: yaml + + instance002: + database: + mode: ro + +2. Reload configurations on both instances using the ``reload()`` function provided by the :ref:`config ` module: + + - ``instance001``: + + .. code-block:: console + + master_master:instance001> require('config'):reload() + --- + ... + + - ``instance002``: + + .. code-block:: console + + master_master:instance002> require('config'):reload() + --- + ... + +3. Delete write-ahead logs and snapshots stored in the ``var/lib/instance002`` directory. + + .. NOTE:: + + ``var/lib`` is the default directory used by tt to store write-ahead logs and snapshots. + Learn more from :ref:`Configuration `. + +4. Restart ``instance002`` using the :ref:`tt restart ` command: + + .. code-block:: console + + $ tt restart master_master:instance002 + +5. Connect to ``instance002`` and make sure it received the correct data from ``instance001``: + + .. code-block:: console + + master_master:instance002> box.space.bands:select() + --- + - - [1, 'Roxette', 1986] + - [2, 'Scorpions', 1965] + - [3, 'Ace of Base', 1987] + - [4, 'The Beatles', 1960] + - [5, 'Pink Floyd', 1965] + ... + + +.. _replication-master-master-resolve-conflict: + +Restarting replication +~~~~~~~~~~~~~~~~~~~~~~ + +After :ref:`reseeding a replica `, you need to resolve a replication conflict that keeps replication stopped: + +1. Execute ``box.info.replication`` on ``instance001``. + ``upstream.status`` is still stopped: + + .. code-block:: console + + master_master:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 9 + upstream: + peer: replicator@127.0.0.1:3302 + lag: 143.52251672745 + status: stopped + idle: 1309.943383 + message: Duplicate key exists in unique index "primary" in space "bands" with + old tuple - [5, "Pink Floyd", 1965] and new tuple - [5, "incorrect data", + 0] + name: instance002 + downstream: + status: follow + idle: 0.47881799999959 + vclock: {2: 6, 1: 9} + lag: 0 + 2: + id: 2 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 6 + name: instance001 + ... + + +2. In the ``config.yaml`` file, clear the ``iproto`` option for ``instance001`` by setting its value to ``{}`` to disconnect this instance from ``instance002``. + Set ``database.mode`` to ``ro``: + + .. code-block:: yaml + + instance001: + database: + mode: ro + iproto: {} + +3. Reload configuration on ``instance001`` only: + + .. code-block:: console + + master_master:instance001> require('config'):reload() + --- + ... + +4. Change ``database.mode`` values back to ``rw`` for both instances and restore ``iproto.listen`` for ``instance001``: + + .. literalinclude:: /code_snippets/snippets/replication/instances.enabled/master_master/config.yaml + :language: yaml + :start-at: instance001 + :end-at: listen: 127.0.0.1:3302 + :dedent: + +5. Reload configurations on both instances one more time: + + - ``instance001``: + + .. code-block:: console + + master_master:instance001> require('config'):reload() + --- ... + - ``instance002``: + + .. code-block:: console + + master_master:instance002> require('config'):reload() + --- + ... + +6. Check ``box.info.replication``. + ``upstream.status`` be ``follow`` now. + + .. code-block:: console + + master_master:instance001> box.info.replication + --- + - 1: + id: 1 + uuid: 4cfa6e3c-625e-b027-00a7-29b2f2182f23 + lsn: 9 + upstream: + status: follow + idle: 0.21281300000192 + peer: replicator@127.0.0.1:3302 + lag: 0.00031113624572754 + name: instance002 + downstream: + status: follow + idle: 0.035179000002245 + vclock: {2: 6, 1: 9} + lag: 0 + 2: + id: 2 + uuid: 9bb111c2-3ff5-36a7-00f4-2b9a573ea660 + lsn: 6 + name: instance001 + ... + + + +.. _replication-master-master-add-remove-instances: + +Adding and removing instances +----------------------------- +The process of adding instances to a replica set and removing them is similar for all failover modes. +Learn how to do this from the :ref:`Master-replica: manual failover ` tutorial: +- :ref:`Adding instances ` +- :ref:`Removing instances ` +Before removing an instance from a replica set with :ref:`replication.failover ` set to ``off``, make sure this instance is in read-only mode. From 33dd5afee90d48819a341e95a2de4ed4f9a24899 Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Tue, 5 Dec 2023 14:46:50 +0300 Subject: [PATCH 5/6] 3.0 configuration: update per TW review --- doc/how-to/replication/repl_bootstrap.rst | 7 +++++-- doc/how-to/replication/repl_bootstrap_auto.rst | 8 +++++--- doc/how-to/replication/repl_bootstrap_master_master.rst | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/doc/how-to/replication/repl_bootstrap.rst b/doc/how-to/replication/repl_bootstrap.rst index 3ee0414a25..95af3c9cac 100644 --- a/doc/how-to/replication/repl_bootstrap.rst +++ b/doc/how-to/replication/repl_bootstrap.rst @@ -6,6 +6,8 @@ Master-replica: manual failover **Example on GitHub**: `manual_leader `_ +This tutorial shows how to configure and work with a replica set with manual failover. + .. _replication-tt-env: @@ -236,7 +238,7 @@ To check that a replica (``instance002``) gets all updates from the master, foll .. NOTE:: - Note that a ``vclock`` value might include the 0-th component that is related to local space operations and might differ for different instances in a replica set. + Note that a ``vclock`` value might include the ``0`` component that is related to local space operations and might differ for different instances in a replica set. .. vclock_0th_component_note_end @@ -298,7 +300,8 @@ Starting an instance Reloading configuration ~~~~~~~~~~~~~~~~~~~~~~~ -After adding ``instance003`` to the configuration and starting it, configurations on all instances should be reloaded to allow ``instance001`` and ``instance002`` to get data from the new instance in case it becomes a master: +After you added ``instance003`` to the configuration and started it, you need to reload configurations on all instances. +This is required to allow ``instance001`` and ``instance002`` to get data from the new instance in case it becomes a master. 1. Connect to ``instance003`` using ``tt connect``: diff --git a/doc/how-to/replication/repl_bootstrap_auto.rst b/doc/how-to/replication/repl_bootstrap_auto.rst index c78ac76f40..ca80287eb6 100644 --- a/doc/how-to/replication/repl_bootstrap_auto.rst +++ b/doc/how-to/replication/repl_bootstrap_auto.rst @@ -5,6 +5,8 @@ Master-replica: automated failover **Example on GitHub**: `auto_leader `_ +This tutorial shows how to configure and work with a replica set with automated failover. + .. _replication-automated-failover-tt-env: @@ -227,7 +229,7 @@ Checking a replica set status Adding data ~~~~~~~~~~~ -To check that replicas (``instance001`` and ``instance003``) get all updates from the master(``instance002``), follow the steps below: +To check that replicas (``instance001`` and ``instance003``) get all updates from the master (``instance002``), follow the steps below: 1. Connect to ``instance002`` using ``tt connect``: @@ -241,7 +243,7 @@ To check that replicas (``instance001`` and ``instance003``) get all updates fro 3. Use the ``select`` operation on ``instance001`` and ``instance003`` to make sure data is replicated. -4. Check that the 1-st component of :ref:`box.info.vclock ` values are the same on all instances: +4. Check that the ``1`` component of :ref:`box.info.vclock ` values are the same on all instances: - ``instance001``: @@ -368,7 +370,7 @@ To test how automated failover works if the current master is stopped, follow th Choosing a leader manually -------------------------- -1. Make sure that :ref:`box.info.vclock ` values (excluding the 0-th components) are the same on all instances: +1. Make sure that :ref:`box.info.vclock ` values (except the ``0`` components) are the same on all instances: - ``instance001``: diff --git a/doc/how-to/replication/repl_bootstrap_master_master.rst b/doc/how-to/replication/repl_bootstrap_master_master.rst index 1fb8792985..677ae52cb7 100644 --- a/doc/how-to/replication/repl_bootstrap_master_master.rst +++ b/doc/how-to/replication/repl_bootstrap_master_master.rst @@ -5,6 +5,8 @@ Master-master **Example on GitHub**: `master_master `_ +This tutorial shows how to configure and work with a master-master replica set. + .. _replication-master-master-tt-env: From 1a0aa6f5d8c698e859b25f46ba84ab2a7581bb5e Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Thu, 7 Dec 2023 10:08:19 +0300 Subject: [PATCH 6/6] 3.0 configuration: update per TW review 2 --- doc/concepts/replication/repl_leader_elect.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/concepts/replication/repl_leader_elect.rst b/doc/concepts/replication/repl_leader_elect.rst index 8805029498..b6bc31d08c 100644 --- a/doc/concepts/replication/repl_leader_elect.rst +++ b/doc/concepts/replication/repl_leader_elect.rst @@ -226,7 +226,7 @@ Important notes ~~~~~~~~~~~~~~~ Leader election doesn't work correctly if the election quorum is set to less or equal -than `` / 2`` because in that case, a split vote can lead to +than `` / 2``. In that case, a split vote can lead to a state when two leaders are elected at once. For example, suppose there are five nodes. When the quorum is set to ``2``, ``node1``