PBM. Add test to check user restoration during PITR restore from selective / full backup #197

GitHub Actions / JUnit Test Report failed Dec 11, 2024 in 0s

49 tests run, 40 passed, 8 skipped, 1 failed.

Annotations

Check failure on line 143 in pbm-functional/pytest/test_user_roles.py

github-actions / JUnit Test Report

test_user_roles.test_logical_PBM_T216[full_bck]

AssertionError: Starting restore 2024-12-11T10:01:24.445295729Z to point-in-time 2024-12-11T09:59:50 from '2024-12-11T09:58:40Z'..Error: waiting for start: cluster failed: no chunk with the target time, the last chunk ends on {1733911185 21}
- Restore on replicaset "rscfg" in state: running
- Restore on replicaset "rs1" in state: error: no chunk with the target time, the last chunk ends on {1733911185 21}

Raw output


            start_cluster = True, cluster = <cluster.Cluster object at 0x7faa69903c90>
newcluster = <cluster.Cluster object at 0x7faa69902510>
restore_type = 'full_bck'

    @pytest.mark.parametrize('restore_type',['part_bck','full_bck_part_rst_wo_user','full_bck_part_rst_user','full_bck'])
    @pytest.mark.timeout(600, func_only=True)
    def test_logical_PBM_T216(start_cluster, cluster, newcluster, restore_type):
        cluster.check_pbm_status()
        client = pymongo.MongoClient(cluster.connection)
        client_shard = pymongo.MongoClient("mongodb://root:root@rs101:27017/")
        client.admin.command({"enableSharding": "test_db1", "primaryShard": "rs1"})
        client.admin.command({"enableSharding": "test_db2", "primaryShard": "rs2"})
        client.admin.command("shardCollection", "test_db1.test_coll11", key={"_id": "hashed"})
        client.admin.command('updateUser', 'pbm_test', pwd='pbmpass_test2')
        client.admin.command('createUser', 'admin_random_user1', pwd='test123', roles=[{'role':'readWrite','db':'admin'}, 'userAdminAnyDatabase', 'clusterAdmin'])
        client_shard.admin.command('createUser', 'admin_random_user2', pwd='test123', roles=[{'role':'readWrite','db':'admin'}, 'userAdminAnyDatabase', 'clusterAdmin'])
        client.test_db1.command('createUser', 'test_random_user1', pwd='test123', roles=[{'role':'readWrite','db':'test_db1'}, {'role':'clusterManager','db':'admin'}])
        client_shard.test_db1.command('createUser', 'test_random_user2', pwd='test123', roles=[{'role':'readWrite','db':'test_db1'}, {'role':'clusterManager','db':'admin'}])
        for i in range(10):
            client["test_db1"]["test_coll11"].insert_one({"key": i, "data": i})
            client["test_db2"]["test_coll21"].insert_one({"key": i, "data": i})
        backup_full = cluster.make_backup("logical")
        backup_partial = cluster.make_backup("logical --ns=test_db1.*,test_db2.*")
        cluster.enable_pitr(pitr_extra_args="--set pitr.oplogSpanMin=0.5")
        client.admin.command('createUser', 'admin_random_user3', pwd='test123', roles=[{'role':'readWrite','db':'admin'}, 'userAdminAnyDatabase', 'clusterAdmin'])
        client_shard.admin.command('createUser', 'admin_random_user4', pwd='test123', roles=[{'role':'readWrite','db':'admin'}, 'userAdminAnyDatabase', 'clusterAdmin'])
        client.test_db1.command('createUser', 'test_random_user3', pwd='test123', roles=[{'role':'readWrite','db':'test_db1'}, {'role':'clusterManager','db':'admin'}])
        client_shard.test_db1.command('createUser', 'test_random_user4', pwd='test123', roles=[{'role':'readWrite','db':'test_db1'}, {'role':'clusterManager','db':'admin'}])
        for i in range(10):
            client["test_db1"]["test_coll11"].insert_one({"key": i+10, "data": i+10})
            client["test_db2"]["test_coll21"].insert_one({"key": i+10, "data": i+10})
        time.sleep(5)
        pitr = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
        pitr = " --time=" + pitr
        Cluster.log("Time for PITR is: " + pitr)
        time.sleep(10)
        cluster.disable_pitr()
        time.sleep(10)
        client.drop_database("test_db1")
        client.drop_database("test_db2")
        client.admin.command("dropUser", "admin_random_user1")
        client_shard.admin.command("dropUser", "admin_random_user2")
        client.admin.command("dropUser", "admin_random_user3")
        client_shard.admin.command("dropUser", "admin_random_user4")
        client.test_db1.command("dropUser", "test_random_user1")
        client_shard.test_db1.command("dropUser", "test_random_user2")
        client.test_db1.command("dropUser", "test_random_user3")
        client_shard.test_db1.command("dropUser", "test_random_user4")
    
        # restoring users and roles from selective backup is not supported
        restore_commands = {
            'part_bck': " --base-snapshot=" + backup_partial + pitr,
            'full_bck_part_rst_wo_user': " --base-snapshot=" + backup_full + pitr + " --ns=test_db1.*,test_db2.*",
            'full_bck_part_rst_user': " --base-snapshot=" + backup_full + pitr + " --ns=test_db1.*,test_db2.* --with-users-and-roles",
            'full_bck': " --base-snapshot=" + backup_full + pitr
        }
    
        # re-create cluster with new PBM user for connection to check that restore and connection to DB are OK
        # despite the same user with different password is present in backup
        if restore_type == 'full_bck':
            cluster.destroy()
            newcluster.create()
            newcluster.setup_pbm()
            newcluster.check_pbm_status()
>           newcluster.make_restore(restore_commands.get(restore_type), check_pbm_status=True)

test_user_roles.py:143: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <cluster.Cluster object at 0x7faa69902510>
name = ' --base-snapshot=2024-12-11T09:58:40Z --time=2024-12-11T09:59:50'
kwargs = {'check_pbm_status': True}
client = MongoClient(host=['mongos:27017'], document_class=dict, tz_aware=False, connect=True)
result = CommandResult(backend=<testinfra.backend.docker.DockerBackend object at 0x7faa69889910>, exit_status=1, command=b'time... Restore on replicaset "rs1" in state: error: no chunk with the target time, the last chunk ends on {1733911185 21}\n')
n = <testinfra.host.Host docker://rscfg01>, timeout = 240, error = ''
host = 'rscfg03', container = <Container: f49a51305399>

    def make_restore(self, name, **kwargs):
        if self.layout == "sharded":
            client = pymongo.MongoClient(self.connection)
            result = client.admin.command("balancerStop")
            client.close()
            Cluster.log("Stopping balancer: " + str(result))
            self.stop_mongos()
        self.stop_arbiters()
        n = testinfra.get_host("docker://" + self.pbm_cli)
        timeout = time.time() + 60
    
        while True:
            if not self.get_status()['running']:
                break
            if time.time() > timeout:
                assert False, "Cannot start restore, another operation running"
            time.sleep(1)
        Cluster.log("Restore started")
        timeout=kwargs.get('timeout', 240)
        result = n.run('timeout ' + str(timeout) + ' pbm restore ' + name + ' --wait')
    
        if result.rc == 0:
            Cluster.log(result.stdout)
        else:
            # try to catch possible failures if timeout exceeded
            error=''
            for host in self.mongod_hosts:
                try:
                    container = docker.from_env().containers.get(host)
                    get_logs = container.exec_run(
                        'cat /var/lib/mongo/pbm.restore.log', stderr=False)
                    if get_logs.exit_code == 0:
                        Cluster.log(
                            "!!!!Possible failure on {}, file pbm.restore.log was found:".format(host))
                        logs = get_logs.output.decode('utf-8')
                        Cluster.log(logs)
                        if '"s":"F"' in logs:
                            error = logs
                except docker.errors.APIError:
                    pass
            if error:
                assert False, result.stdout + result.stderr + "\n" + error
            else:
>               assert False, result.stdout + result.stderr
E               AssertionError: Starting restore 2024-12-11T10:01:24.445295729Z to point-in-time 2024-12-11T09:59:50 from '2024-12-11T09:58:40Z'..Error: waiting for start: cluster failed: no chunk with the target time, the last chunk ends on {1733911185 21}
E               - Restore on replicaset "rscfg" in state: running
E               - Restore on replicaset "rs1" in state: error: no chunk with the target time, the last chunk ends on {1733911185 21}

cluster.py:464: AssertionError

View more details on GitHub Actions

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

PBM. Add test to check user restoration during PITR restore from selective / full backup #197

JUnit Test Report

PBM. Add test to check user restoration during PITR restore from selective / full backup #197

Jobs

Run details

49 tests run, 40 passed, 8 skipped, 1 failed.

Annotations

github-actions / JUnit Test Report

Re-running jobs...