Skip to content

Commit

Permalink
fixes update propagation bug when citus_set_coordinator_host is cal…
Browse files Browse the repository at this point in the history
…led more than once (#6837)

DESCRIPTION: Fixes update propagation bug when
`citus_set_coordinator_host` is called more than once.

Fixes #6731.

(cherry picked from commit a20f7e1)
  • Loading branch information
aykut-bozkurt committed Apr 13, 2023
1 parent e14f4c3 commit 1a9066c
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5 deletions.
26 changes: 21 additions & 5 deletions src/backend/distributed/metadata/node_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ static void BlockDistributedQueriesOnMetadataNodes(void);
static WorkerNode * TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple);
static bool NodeIsLocal(WorkerNode *worker);
static void SetLockTimeoutLocally(int32 lock_cooldown);
static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort);
static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort,
bool localOnly);
static bool UnsetMetadataSyncedForAllWorkers(void);
static char * GetMetadataSyncCommandToSetNodeColumn(WorkerNode *workerNode,
int columnIndex,
Expand Down Expand Up @@ -231,8 +232,8 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS)
* do not need to worry about concurrent changes (e.g. deletion) and
* can proceed to update immediately.
*/

UpdateNodeLocation(coordinatorNode->nodeId, nodeNameString, nodePort);
bool localOnly = false;
UpdateNodeLocation(coordinatorNode->nodeId, nodeNameString, nodePort, localOnly);

/* clear cached plans that have the old host/port */
ResetPlanCache();
Expand Down Expand Up @@ -1290,7 +1291,8 @@ citus_update_node(PG_FUNCTION_ARGS)
*/
ResetPlanCache();

UpdateNodeLocation(nodeId, newNodeNameString, newNodePort);
bool localOnly = true;
UpdateNodeLocation(nodeId, newNodeNameString, newNodePort, localOnly);

/* we should be able to find the new node from the metadata */
workerNode = FindWorkerNodeAnyCluster(newNodeNameString, newNodePort);
Expand Down Expand Up @@ -1352,7 +1354,7 @@ SetLockTimeoutLocally(int32 lockCooldown)


static void
UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort)
UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool localOnly)
{
const bool indexOK = true;

Expand Down Expand Up @@ -1396,6 +1398,20 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort)

CommandCounterIncrement();

if (!localOnly && EnableMetadataSync)
{
WorkerNode *updatedNode = FindWorkerNodeAnyCluster(newNodeName, newNodePort);
Assert(updatedNode->nodeId == nodeId);

/* send the delete command to all primary nodes with metadata */
char *nodeDeleteCommand = NodeDeleteCommand(updatedNode->nodeId);
SendCommandToWorkersWithMetadata(nodeDeleteCommand);

/* send the insert command to all primary nodes with metadata */
char *nodeInsertCommand = NodeListInsertCommand(list_make1(updatedNode));
SendCommandToWorkersWithMetadata(nodeInsertCommand);
}

systable_endscan(scanDescriptor);
table_close(pgDistNode, NoLock);
}
Expand Down
16 changes: 16 additions & 0 deletions src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,28 @@ CREATE TABLE loc1 (id int PRIMARY KEY);
INSERT INTO loc1 SELECT i FROM generate_series(1,100) i;
CREATE TABLE loc2 (id int REFERENCES loc1(id));
INSERT INTO loc2 SELECT i FROM generate_series(1,100) i;
-- citus_set_coordinator_host with wrong port
SELECT citus_set_coordinator_host('localhost', 9999);
citus_set_coordinator_host
---------------------------------------------------------------------

(1 row)

-- citus_set_coordinator_host with correct port
SELECT citus_set_coordinator_host('localhost', :master_port);
citus_set_coordinator_host
---------------------------------------------------------------------

(1 row)

-- show coordinator port is correct on all workers
SELECT * FROM run_command_on_workers($$SELECT row(nodename,nodeport) FROM pg_dist_node WHERE groupid = 0$$);
nodename | nodeport | success | result
---------------------------------------------------------------------
localhost | 9060 | t | (localhost,57636)
localhost | 57637 | t | (localhost,57636)
(2 rows)

SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true);
citus_add_local_table_to_metadata
---------------------------------------------------------------------
Expand Down
5 changes: 5 additions & 0 deletions src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@ INSERT INTO loc1 SELECT i FROM generate_series(1,100) i;
CREATE TABLE loc2 (id int REFERENCES loc1(id));
INSERT INTO loc2 SELECT i FROM generate_series(1,100) i;

-- citus_set_coordinator_host with wrong port
SELECT citus_set_coordinator_host('localhost', 9999);
-- citus_set_coordinator_host with correct port
SELECT citus_set_coordinator_host('localhost', :master_port);
-- show coordinator port is correct on all workers
SELECT * FROM run_command_on_workers($$SELECT row(nodename,nodeport) FROM pg_dist_node WHERE groupid = 0$$);
SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true);

-- Create partitioned distributed table
Expand Down

0 comments on commit 1a9066c

Please sign in to comment.