Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix 3 flaky tests in failure schedule #6846

Merged
merged 1 commit into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/test/regress/expected/failure_add_disable_node.out
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ ORDER BY placementid;
(1 row)

-- reset cluster to original state
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 2;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 2;
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
Expand All @@ -196,7 +198,7 @@ SELECT citus.mitmproxy('conn.allow()');
SELECT master_add_node('localhost', :worker_2_proxy_port);
master_add_node
---------------------------------------------------------------------
4
2
(1 row)

-- verify node is added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ SET citus.shard_count TO 2;
SET citus.shard_replication_factor TO 1;
SET citus.max_adaptive_executor_pool_size TO 1;
SELECT pg_backend_pid() as pid \gset
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 222222;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 333333;
-- make sure coordinator is in the metadata
SELECT citus_set_coordinator_host('localhost', 57636);
citus_set_coordinator_host
Expand Down Expand Up @@ -189,8 +191,8 @@ SELECT create_distributed_table_concurrently('table_1', 'id');
SELECT * FROM pg_dist_shard WHERE logicalrelid = 'table_1'::regclass;
logicalrelid | shardid | shardstorage | shardminvalue | shardmaxvalue
---------------------------------------------------------------------
table_1 | 1880080 | t | -2147483648 | -1
table_1 | 1880081 | t | 0 | 2147483647
table_1 | 222247 | t | -2147483648 | -1
table_1 | 222248 | t | 0 | 2147483647
(2 rows)

DROP SCHEMA create_dist_tbl_con CASCADE;
Expand All @@ -201,3 +203,5 @@ SELECT citus_remove_node('localhost', 57636);

(1 row)

ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;
42 changes: 17 additions & 25 deletions src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,8 @@ ERROR: connection not open
SELECT * FROM pg_dist_node ORDER BY nodeport;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
---------------------------------------------------------------------
4 | 4 | localhost | 9060 | default | f | t | primary | default | f | t
6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
2 | 2 | localhost | 9060 | default | f | t | primary | default | f | t
3 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t
(3 rows)

Expand Down Expand Up @@ -626,24 +626,14 @@ UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val;
-- Show that we can still delete from a shard at the node from coordinator
DELETE FROM dist1 WHERE id = :failed_node_val;
-- Show that DDL would still propagate to the node
SET client_min_messages TO NOTICE;
SET citus.log_remote_commands TO 1;
CREATE SCHEMA dummy;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off'
NOTICE: issuing CREATE SCHEMA dummy
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off'
NOTICE: issuing CREATE SCHEMA dummy
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
NOTICE: issuing WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['dummy']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data;
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
SET citus.log_remote_commands TO 0;
SET client_min_messages TO ERROR;
SELECT * FROM run_command_on_workers($$SELECT nspname FROM pg_namespace WHERE nspname = 'dummy'$$);
nodename | nodeport | success | result
---------------------------------------------------------------------
localhost | 9060 | t | dummy
localhost | 57637 | t | dummy
(2 rows)

-- Successfully activate the node after many failures
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
Expand All @@ -654,32 +644,32 @@ SELECT citus.mitmproxy('conn.allow()');
SELECT citus_activate_node('localhost', :worker_2_proxy_port);
citus_activate_node
---------------------------------------------------------------------
4
2
(1 row)

-- Activate the node once more to verify it works again with already synced metadata
SELECT citus_activate_node('localhost', :worker_2_proxy_port);
citus_activate_node
---------------------------------------------------------------------
4
2
(1 row)

-- Show node metadata info on worker2 and coordinator after success
\c - - - :worker_2_port
SELECT * FROM pg_dist_node ORDER BY nodeport;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
---------------------------------------------------------------------
4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t
6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
2 | 2 | localhost | 9060 | default | t | t | primary | default | t | t
3 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t
(3 rows)

\c - - - :master_port
SELECT * FROM pg_dist_node ORDER BY nodeport;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
---------------------------------------------------------------------
4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t
6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
2 | 2 | localhost | 9060 | default | t | t | primary | default | t | t
3 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t
(3 rows)

Expand All @@ -701,3 +691,5 @@ SELECT citus_remove_node('localhost', :master_port);

(1 row)

ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;
2 changes: 2 additions & 0 deletions src/test/regress/sql/failure_add_disable_node.sql
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ WHERE s.logicalrelid = 'user_table'::regclass AND n.isactive
ORDER BY placementid;

-- reset cluster to original state
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 2;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 2;
SELECT citus.mitmproxy('conn.allow()');
SELECT master_add_node('localhost', :worker_2_proxy_port);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ SET citus.shard_replication_factor TO 1;
SET citus.max_adaptive_executor_pool_size TO 1;
SELECT pg_backend_pid() as pid \gset

ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 222222;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 333333;

-- make sure coordinator is in the metadata
SELECT citus_set_coordinator_host('localhost', 57636);

Expand Down Expand Up @@ -108,3 +111,5 @@ SELECT * FROM pg_dist_shard WHERE logicalrelid = 'table_1'::regclass;
DROP SCHEMA create_dist_tbl_con CASCADE;
SET search_path TO default;
SELECT citus_remove_node('localhost', 57636);
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;
7 changes: 3 additions & 4 deletions src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,8 @@ UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val;
DELETE FROM dist1 WHERE id = :failed_node_val;

-- Show that DDL would still propagate to the node
SET client_min_messages TO NOTICE;
SET citus.log_remote_commands TO 1;
CREATE SCHEMA dummy;
SET citus.log_remote_commands TO 0;
SET client_min_messages TO ERROR;
SELECT * FROM run_command_on_workers($$SELECT nspname FROM pg_namespace WHERE nspname = 'dummy'$$);

-- Successfully activate the node after many failures
SELECT citus.mitmproxy('conn.allow()');
Expand All @@ -285,3 +282,5 @@ DROP SCHEMA mx_metadata_sync_multi_trans CASCADE;
DROP ROLE foo1;
DROP ROLE foo2;
SELECT citus_remove_node('localhost', :master_port);
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;