From 72a03c7d93c662b1cad1e55fdd0003f9aef1660b Mon Sep 17 00:00:00 2001 From: Sergei Politov Date: Wed, 5 May 2021 09:41:27 +0300 Subject: [PATCH] [Backport 2.6] [#7126] PITR: Restore deleted table Summary: Adds the ability to restore the table that was previously deleted. When the tablet that participates in the snapshot schedule is being deleted, it is marked as hidden instead of performing actual delete. Such tablets reject reads and write, but could be restored to some point in time. Cleanup for such tables should be implemented in follow-up diffs. Original commit: D11389 / 9fd73c7219fa01963660addd21b8dac9fb9c8530 Test Plan: ybd --gtest_filter YbAdminSnapshotScheduleTest.SnapshotScheduleUndeleteTable Jenkins: rebase: 2.6 Reviewers: bogdan Reviewed By: bogdan Subscribers: ybase, mbautin, skedia, rahuldesirazu Differential Revision: https://phabricator.dev.yugabyte.com/D11594 --- ent/src/yb/master/async_snapshot_tasks.cc | 8 + ent/src/yb/master/async_snapshot_tasks.h | 9 +- ent/src/yb/master/catalog_manager.h | 3 +- ent/src/yb/master/catalog_manager_ent.cc | 9 +- src/yb/common/snapshot.h | 4 +- .../cassandra_cpp_driver-test.cc | 7 +- src/yb/integration-tests/cql_test_util.h | 9 + src/yb/master/async_rpc_tasks.cc | 7 +- src/yb/master/async_rpc_tasks.h | 8 +- src/yb/master/catalog_entity_info.cc | 3 +- src/yb/master/catalog_manager.cc | 155 +++++---- src/yb/master/catalog_manager.h | 33 +- src/yb/master/master.proto | 2 + src/yb/master/master_snapshot_coordinator.cc | 49 ++- src/yb/master/master_snapshot_coordinator.h | 3 +- src/yb/master/state_with_tablets.h | 10 + src/yb/master/system_tablet.h | 4 + src/yb/tablet/abstract_tablet.h | 2 + src/yb/tablet/metadata.proto | 3 + .../tablet/operations/snapshot_operation.cc | 6 + src/yb/tablet/tablet.cc | 2 +- src/yb/tablet/tablet.h | 4 + src/yb/tablet/tablet_component.cc | 5 +- src/yb/tablet/tablet_component.h | 2 +- src/yb/tablet/tablet_metadata.cc | 17 +- src/yb/tablet/tablet_metadata.h | 7 +- src/yb/tablet/tablet_snapshots.cc | 46 ++- src/yb/tools/CMakeLists.txt | 8 + src/yb/tools/admin-test-base.cc | 55 +++ src/yb/tools/admin-test-base.h | 51 +++ .../tools/yb-admin-snapshot-schedule-test.cc | 325 ++++++++++++++++++ src/yb/tools/yb-admin-test.cc | 198 +---------- src/yb/tserver/backup.proto | 2 + src/yb/tserver/tablet_service.cc | 16 + src/yb/tserver/ts_tablet_manager-test.cc | 2 + src/yb/tserver/ts_tablet_manager.cc | 4 + src/yb/tserver/ts_tablet_manager.h | 11 +- src/yb/tserver/tserver_admin.proto | 3 + 38 files changed, 758 insertions(+), 334 deletions(-) create mode 100644 src/yb/tools/admin-test-base.cc create mode 100644 src/yb/tools/admin-test-base.h create mode 100644 src/yb/tools/yb-admin-snapshot-schedule-test.cc diff --git a/ent/src/yb/master/async_snapshot_tasks.cc b/ent/src/yb/master/async_snapshot_tasks.cc index 104beebae028..f753ab0028a9 100644 --- a/ent/src/yb/master/async_snapshot_tasks.cc +++ b/ent/src/yb/master/async_snapshot_tasks.cc @@ -157,6 +157,7 @@ bool AsyncTabletSnapshotOp::SendRequest(int attempt) { req.set_schema_version(schema_version_); *req.mutable_schema() = schema_; *req.mutable_indexes() = indexes_; + req.set_hide(hide_); } req.set_propagated_hybrid_time(master_->clock()->Now().ToUint64()); @@ -186,5 +187,12 @@ void AsyncTabletSnapshotOp::Finished(const Status& status) { } } +void AsyncTabletSnapshotOp::SetMetadata(const SysTablesEntryPB& pb) { + has_metadata_ = true; + schema_version_ = pb.version(); + schema_ = pb.schema(); + indexes_ = pb.indexes(); +} + } // namespace master } // namespace yb diff --git a/ent/src/yb/master/async_snapshot_tasks.h b/ent/src/yb/master/async_snapshot_tasks.h index e1cc75987b58..a7d99c8d827f 100644 --- a/ent/src/yb/master/async_snapshot_tasks.h +++ b/ent/src/yb/master/async_snapshot_tasks.h @@ -48,13 +48,7 @@ class AsyncTabletSnapshotOp : public enterprise::RetryingTSRpcTask { snapshot_hybrid_time_ = value; } - void SetMetadata(uint32_t schema_version, const SchemaPB& schema, - const google::protobuf::RepeatedPtrField& indexes) { - has_metadata_ = true; - schema_version_ = schema_version; - schema_ = schema; - indexes_ = indexes; - } + void SetMetadata(const SysTablesEntryPB& pb); void SetCallback(TabletSnapshotOperationCallback callback) { callback_ = std::move(callback); @@ -79,6 +73,7 @@ class AsyncTabletSnapshotOp : public enterprise::RetryingTSRpcTask { uint32_t schema_version_; SchemaPB schema_; google::protobuf::RepeatedPtrField indexes_; + bool hide_ = false; }; } // namespace master diff --git a/ent/src/yb/master/catalog_manager.h b/ent/src/yb/master/catalog_manager.h index 6734ed5b8cd7..c71a8b8a173d 100644 --- a/ent/src/yb/master/catalog_manager.h +++ b/ent/src/yb/master/catalog_manager.h @@ -282,7 +282,8 @@ class CatalogManager : public yb::master::CatalogManager, SnapshotCoordinatorCon int64_t LeaderTerm() override; - Result MakeSnapshotSchedulesToTabletsMap() override; + Result MakeSnapshotSchedulesToObjectIdsMap( + SysRowEntry::Type type) override; static void SetTabletSnapshotsState(SysSnapshotEntryPB::State state, SysSnapshotEntryPB* snapshot_pb); diff --git a/ent/src/yb/master/catalog_manager_ent.cc b/ent/src/yb/master/catalog_manager_ent.cc index 211ecbc23830..6c6f3ad1efa9 100644 --- a/ent/src/yb/master/catalog_manager_ent.cc +++ b/ent/src/yb/master/catalog_manager_ent.cc @@ -1400,9 +1400,7 @@ void CatalogManager::SendRestoreTabletSnapshotRequest( call->SetSnapshotHybridTime(restore_at); } if (send_metadata) { - auto lock = tablet->table()->LockForRead(); - const auto& pb = lock->pb; - call->SetMetadata(pb.version(), pb.schema(), pb.indexes()); + call->SetMetadata(tablet->table()->LockForRead()->pb); } call->SetCallback(std::move(callback)); tablet->table()->AddTask(call); @@ -3435,8 +3433,9 @@ void CatalogManager::Started() { snapshot_coordinator_.Start(); } -Result CatalogManager::MakeSnapshotSchedulesToTabletsMap() { - return snapshot_coordinator_.MakeSnapshotSchedulesToTabletsMap(); +Result CatalogManager::MakeSnapshotSchedulesToObjectIdsMap( + SysRowEntry::Type type) { + return snapshot_coordinator_.MakeSnapshotSchedulesToObjectIdsMap(type); } void CatalogManager::SysCatalogLoaded(int64_t term) { diff --git a/src/yb/common/snapshot.h b/src/yb/common/snapshot.h index 0901a0915ee4..311a72917c55 100644 --- a/src/yb/common/snapshot.h +++ b/src/yb/common/snapshot.h @@ -26,8 +26,8 @@ YB_STRONGLY_TYPED_UUID(TxnSnapshotId); YB_STRONGLY_TYPED_UUID(TxnSnapshotRestorationId); YB_STRONGLY_TYPED_UUID(SnapshotScheduleId); -using SnapshotSchedulesToTabletsMap = - std::unordered_map, SnapshotScheduleIdHash>; +using SnapshotSchedulesToObjectIdsMap = + std::unordered_map, SnapshotScheduleIdHash>; } // namespace yb diff --git a/src/yb/integration-tests/cassandra_cpp_driver-test.cc b/src/yb/integration-tests/cassandra_cpp_driver-test.cc index 3e800655bc39..bf70fc7d61bd 100644 --- a/src/yb/integration-tests/cassandra_cpp_driver-test.cc +++ b/src/yb/integration-tests/cassandra_cpp_driver-test.cc @@ -2733,15 +2733,12 @@ TEST_F(CppCassandraDriverTest, BigQueryExpr) { } auto start = MonoTime::Now(); - auto result = ASSERT_RESULT(session_.ExecuteWithResult(Format( + auto result = ASSERT_RESULT(session_.FetchValue(Format( "SELECT MAX(key) FROM $0", kTableName))); auto finish = MonoTime::Now(); LOG(INFO) << "Time: " << finish - start; - auto iterator = result.CreateIterator(); - ASSERT_TRUE(iterator.Next()); - LOG(INFO) << "Result: " << iterator.Row().Value(0).ToString(); - ASSERT_FALSE(iterator.Next()); + LOG(INFO) << "Result: " << result; } class CppCassandraDriverSmallSoftLimitTest : public CppCassandraDriverTest { diff --git a/src/yb/integration-tests/cql_test_util.h b/src/yb/integration-tests/cql_test_util.h index 90654b636155..f93d26198fb7 100644 --- a/src/yb/integration-tests/cql_test_util.h +++ b/src/yb/integration-tests/cql_test_util.h @@ -257,6 +257,15 @@ class CassandraSession { return ExecuteAndProcessOneRow(CassandraStatement(query), action); } + template + Result FetchValue(const std::string& query) { + T result = T(); + RETURN_NOT_OK(ExecuteAndProcessOneRow(query, [&result](const CassandraRow& row) { + result = row.Value(0).As(); + })); + return result; + } + CHECKED_STATUS ExecuteBatch(const CassandraBatch& batch); CassandraFuture SubmitBatch(const CassandraBatch& batch); diff --git a/src/yb/master/async_rpc_tasks.cc b/src/yb/master/async_rpc_tasks.cc index 4abe240db0c4..f3a81ead6b95 100644 --- a/src/yb/master/async_rpc_tasks.cc +++ b/src/yb/master/async_rpc_tasks.cc @@ -618,14 +618,14 @@ void AsyncDeleteReplica::HandleResponse(int attempt) { if (table_) { LOG_WITH_PREFIX(INFO) << "TS " << permanent_uuid_ << ": tablet " << tablet_id_ - << " (table " << table_->ToString() << ") successfully deleted"; + << " (table " << table_->ToString() << ") successfully done"; } else { LOG_WITH_PREFIX(WARNING) << "TS " << permanent_uuid_ << ": tablet " << tablet_id_ << " did not belong to a known table, but was successfully deleted"; } TransitionToCompleteState(); - VLOG_WITH_PREFIX(1) << "TS " << permanent_uuid_ << ": delete complete on tablet " << tablet_id_; + VLOG_WITH_PREFIX(1) << "TS " << permanent_uuid_ << ": complete on tablet " << tablet_id_; } } @@ -635,6 +635,9 @@ bool AsyncDeleteReplica::SendRequest(int attempt) { req.set_tablet_id(tablet_id_); req.set_reason(reason_); req.set_delete_type(delete_type_); + if (hide_only_) { + req.set_hide_only(hide_only_); + } if (cas_config_opid_index_less_or_equal_) { req.set_cas_config_opid_index_less_or_equal(*cas_config_opid_index_less_or_equal_); } diff --git a/src/yb/master/async_rpc_tasks.h b/src/yb/master/async_rpc_tasks.h index f54bf5940fec..7619f720e08d 100644 --- a/src/yb/master/async_rpc_tasks.h +++ b/src/yb/master/async_rpc_tasks.h @@ -368,7 +368,12 @@ class AsyncDeleteReplica : public RetrySpecificTSRpcTask { std::string type_name() const override { return "Delete Tablet"; } std::string description() const override { - return "Delete Tablet RPC for " + tablet_id_ + " on TS=" + permanent_uuid_; + return Format("$0 Tablet RPC for $1 on TS=$2", + hide_only_ ? "Hide" : "Delete", tablet_id_, permanent_uuid_); + } + + void set_hide_only(bool value) { + hide_only_ = value; } protected: @@ -383,6 +388,7 @@ class AsyncDeleteReplica : public RetrySpecificTSRpcTask { const boost::optional cas_config_opid_index_less_or_equal_; const std::string reason_; tserver::DeleteTabletResponsePB resp_; + bool hide_only_ = false; }; // Send the "Alter Table" with the latest table schema to the leader replica diff --git a/src/yb/master/catalog_entity_info.cc b/src/yb/master/catalog_entity_info.cc index 6e79f4a4cc1c..f8e0d46e6c25 100644 --- a/src/yb/master/catalog_entity_info.cc +++ b/src/yb/master/catalog_entity_info.cc @@ -414,7 +414,8 @@ bool TableInfo::IsAlterInProgress(uint32_t version) const { bool TableInfo::AreAllTabletsDeleted() const { shared_lock l(lock_); for (const TableInfo::TabletInfoMap::value_type& e : tablet_map_) { - if (!e.second->LockForRead()->is_deleted()) { + auto tablet_lock = e.second->LockForRead(); + if (!tablet_lock->is_deleted() && !tablet_lock->pb.hidden()) { return false; } } diff --git a/src/yb/master/catalog_manager.cc b/src/yb/master/catalog_manager.cc index b5ea793f8916..e2d3abf4df34 100644 --- a/src/yb/master/catalog_manager.cc +++ b/src/yb/master/catalog_manager.cc @@ -445,11 +445,14 @@ namespace { (tabpb.has_index_info() ? tabpb.index_info().is_unique() \ : tabpb.is_unique_index()) -#define PROTO_IS_INDEX(tabpb) \ - (tabpb.has_index_info() || !tabpb.indexed_table_id().empty()) +template +bool IsIndex(const PB& pb) { + return pb.has_index_info() || !pb.indexed_table_id().empty(); +} -#define PROTO_IS_TABLE(tabpb) \ - (!tabpb.has_index_info() && tabpb.indexed_table_id().empty()) +bool IsTable(const SysTablesEntryPB& pb) { + return !IsIndex(pb); +} #define PROTO_PTR_IS_INDEX(tabpb) \ (tabpb->has_index_info() || !tabpb->indexed_table_id().empty()) @@ -2168,7 +2171,7 @@ Status CatalogManager::DeleteTablets(const std::vector& tablet_ids) { } return DeleteTabletListAndSendRequests( - tablet_infos, "Tablet deleted upon request at " + LocalTimeAsString()); + tablet_infos, "Tablet deleted upon request at " + LocalTimeAsString(), HideOnly::kFalse); } Status CatalogManager::DeleteTablet( @@ -2390,7 +2393,7 @@ Status CatalogManager::CopyPgsqlSysTables(const NamespaceId& namespace_id, table_req.set_is_pg_catalog_table(true); table_req.set_table_id(table_id); - if (PROTO_IS_INDEX(l->pb)) { + if (IsIndex(l->pb)) { const uint32_t indexed_table_oid = VERIFY_RESULT(GetPgsqlTableOid(PROTO_GET_INDEXED_TABLE_ID(l->pb))); const TableId indexed_table_id = GetPgsqlTableId(database_oid, indexed_table_oid); @@ -2478,7 +2481,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, // For index table, find the table info scoped_refptr indexed_table; - if (PROTO_IS_INDEX(req)) { + if (IsIndex(req)) { TRACE("Looking up indexed table"); indexed_table = GetTableInfo(req.indexed_table_id()); if (indexed_table == nullptr) { @@ -2505,7 +2508,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, // columns into range partition columns. This is because postgres does not know that this index // table is in a colocated database. When we get to the "tablespaces" step where we store this // into PG metadata, then PG will know if db/table is colocated and do the work there. - if ((colocated || req.has_tablegroup_id()) && PROTO_IS_INDEX(req)) { + if ((colocated || req.has_tablegroup_id()) && IsIndex(req)) { for (auto& col_pb : *req.mutable_schema()->mutable_columns()) { col_pb.set_is_hash_key(false); } @@ -3236,7 +3239,7 @@ Status CatalogManager::IsCreateTableDone(const IsCreateTableDoneRequestPB* req, // 4. If this is an index, we are not done until the index is in the indexed table's schema. An // exception is YSQL system table indexes, which don't get added to their indexed tables' schemas. - if (resp->done() && PROTO_IS_INDEX(pb)) { + if (resp->done() && IsIndex(pb)) { auto& indexed_table_id = PROTO_GET_INDEXED_TABLE_ID(pb); // For user indexes (which add index info to indexed table's schema), // - if this index is created without backfill, @@ -3671,7 +3674,7 @@ Status CatalogManager::TruncateTable(const TableId& table_id, // Truncate indexes also. // Note: PG table does not have references to indexes in the base table, so associated indexes // must be truncated from the PG code separately. - const bool is_index = PROTO_IS_INDEX(l->pb); + const bool is_index = IsIndex(l->pb); DCHECK(!is_index || l->pb.indexes().empty()) << "indexes should be empty for index table"; for (const auto& index_info : l->pb.indexes()) { RETURN_NOT_OK(TruncateTable(index_info.table_id(), resp, rpc)); @@ -3897,8 +3900,11 @@ Status CatalogManager::DeleteTableInternal( vector> tables; vector table_locks; + auto schedules_to_tables_map = VERIFY_RESULT( + MakeSnapshotSchedulesToObjectIdsMap(SysRowEntry::TABLE)); + RETURN_NOT_OK(DeleteTableInMemory(req->table(), req->is_index_table(), - true /* update_indexed_table */, + true /* update_indexed_table */, schedules_to_tables_map, &tables, &table_locks, resp, rpc)); // Delete any CDC streams that are set up on this table. @@ -3917,10 +3923,19 @@ Status CatalogManager::DeleteTableInternal( SleepFor(MonoDelta::FromMilliseconds(FLAGS_catalog_manager_inject_latency_in_delete_table_ms)); } - for (const scoped_refptr &table : tables) { + for (const auto& table : tables) { + HideOnly hide_only = HideOnly::kFalse; + for (const auto& entry : schedules_to_tables_map) { + if (std::binary_search(entry.second.begin(), entry.second.end(), table->id())) { + hide_only = HideOnly::kTrue; + break; + } + } + // Send a DeleteTablet() request to each tablet replica in the table. - RETURN_NOT_OK(DeleteTabletsAndSendRequests(table)); + RETURN_NOT_OK(DeleteTabletsAndSendRequests(table, hide_only)); // Send a RemoveTableFromTablet() request to each colocated parent tablet replica in the table. + // TODO(pitr) handle YSQL colocated tables. if (IsColocatedUserTable(*table)) { auto call = std::make_shared( master_, AsyncTaskPool(), table->GetColocatedTablet(), table); @@ -3947,13 +3962,15 @@ Status CatalogManager::DeleteTableInternal( return Status::OK(); } -Status CatalogManager::DeleteTableInMemory(const TableIdentifierPB& table_identifier, - const bool is_index_table, - const bool update_indexed_table, - vector>* tables, - vector* table_lcks, - DeleteTableResponsePB* resp, - rpc::RpcContext* rpc) { +Status CatalogManager::DeleteTableInMemory( + const TableIdentifierPB& table_identifier, + const bool is_index_table, + const bool update_indexed_table, + const SnapshotSchedulesToObjectIdsMap& schedules_to_tables_map, + vector>* tables, + vector* table_lcks, + DeleteTableResponsePB* resp, + rpc::RpcContext* rpc) { // TODO(NIC): How to handle a DeleteTable request when the namespace is being deleted? const char* const object_type = is_index_table ? "index" : "table"; const bool cascade_delete_index = is_index_table && !update_indexed_table; @@ -3975,7 +3992,7 @@ Status CatalogManager::DeleteTableInMemory(const TableIdentifierPB& table_identi auto l = table->LockForWrite(); resp->set_table_id(table->id()); - if (is_index_table == PROTO_IS_TABLE(l.data().pb)) { + if (is_index_table == IsTable(l.data().pb)) { Status s = STATUS(NotFound, "The object does not exist"); return SetupError(resp->mutable_error(), MasterErrorPB::OBJECT_NOT_FOUND, s); } @@ -3993,7 +4010,7 @@ Status CatalogManager::DeleteTableInMemory(const TableIdentifierPB& table_identi TRACE("Updating metadata on disk"); // Update the metadata for the on-disk state. l.mutable_data()->set_state(SysTablesEntryPB::DELETING, - Substitute("Started deleting at $0", LocalTimeAsString())); + Substitute("Started deleting at $0", LocalTimeAsString())); // Update sys-catalog with the removed table state. Status s = sys_catalog_->UpdateItem(table.get(), leader_ready_term()); @@ -4028,8 +4045,8 @@ Status CatalogManager::DeleteTableInMemory(const TableIdentifierPB& table_identi for (auto index : l->pb.indexes()) { index_identifier.set_table_id(index.table_id()); RETURN_NOT_OK(DeleteTableInMemory(index_identifier, true /* is_index_table */, - false /* update_indexed_table */, tables, - table_lcks, resp, rpc)); + false /* update_indexed_table */, schedules_to_tables_map, + tables, table_lcks, resp, rpc)); } } else if (update_indexed_table) { s = MarkIndexInfoFromTableForDeletion( @@ -5009,8 +5026,7 @@ void CatalogManager::NotifyTabletDeleteFinished(const TabletServerId& tserver_uu if (!lock.locked()) { return; } - vector tables_to_delete({table.get()}); - Status s = sys_catalog_->UpdateItems(tables_to_delete, leader_ready_term()); + Status s = sys_catalog_->UpdateItem(table.get(), leader_ready_term()); if (!s.ok()) { LOG(WARNING) << "Error marking table as DELETED: " << s.ToString(); return; @@ -6107,7 +6123,7 @@ Status CatalogManager::DeleteNamespace(const DeleteNamespaceRequestPB* req, if (!ltm->started_deleting() && ltm->namespace_id() == ns->id()) { Status s = STATUS(InvalidArgument, Substitute("Cannot delete keyspace which has $0: $1 [id=$2]", - PROTO_IS_TABLE(ltm->pb) ? "table" : "index", + IsTable(ltm->pb) ? "table" : "index", ltm->name(), entry.second->id()), req->DebugString()); return SetupError(resp->mutable_error(), MasterErrorPB::NAMESPACE_IS_NOT_EMPTY, s); } @@ -6292,7 +6308,7 @@ Status CatalogManager::DeleteYsqlDBTables(const scoped_refptr& da // For regular (indexed) table, insert table info and lock in the front of the list. Else for // index table, append them to the end. We do so so that we will commit and delete the indexed // table first before its indexes. - if (PROTO_IS_TABLE(l->pb)) { + if (IsTable(l->pb)) { tables.insert(tables.begin(), {table, std::move(l)}); } else { tables.push_back({table, std::move(l)}); @@ -6347,7 +6363,8 @@ Status CatalogManager::DeleteYsqlDBTables(const scoped_refptr& da // Send a DeleteTablet() RPC request to each tablet replica in the table. for (auto &table_and_lock : tables) { auto &table = table_and_lock.first; - RETURN_NOT_OK(DeleteTabletsAndSendRequests(table)); + // TODO(pitr) undelete for YSQL tables + RETURN_NOT_OK(DeleteTabletsAndSendRequests(table, HideOnly::kFalse)); } // Invoke any background tasks and return (notably, table cleanup). @@ -7289,13 +7306,14 @@ void CatalogManager::SendSplitTabletRequest( Format("Failed to send split tablet request for tablet $0", tablet->tablet_id())); } -void CatalogManager::DeleteTabletReplicas(TabletInfo* tablet, const std::string& msg) { +void CatalogManager::DeleteTabletReplicas( + TabletInfo* tablet, const std::string& msg, bool hide_only) { auto locations = tablet->GetReplicaLocations(); LOG(INFO) << "Sending DeleteTablet for " << locations->size() << " replicas of tablet " << tablet->tablet_id(); for (const TabletInfo::ReplicaMap::value_type& r : *locations) { - SendDeleteTabletRequest(tablet->tablet_id(), TABLET_DATA_DELETED, - boost::none, tablet->table(), r.second.ts_desc, msg); + SendDeleteTabletRequest(tablet->tablet_id(), TABLET_DATA_DELETED, boost::none, tablet->table(), + r.second.ts_desc, msg, hide_only); } } @@ -7311,7 +7329,7 @@ Status CatalogManager::CheckIfForbiddenToDeleteTabletOf(const scoped_refptr& table) { +Status CatalogManager::DeleteTabletsAndSendRequests(const TableInfoPtr& table, HideOnly hide_only) { // Silently fail if tablet deletion is forbidden so table deletion can continue executing. if (!CheckIfForbiddenToDeleteTabletOf(table).ok()) { return Status::OK(); @@ -7319,12 +7337,13 @@ Status CatalogManager::DeleteTabletsAndSendRequests(const scoped_refptr> tablets; table->GetAllTablets(&tablets); + std::sort(tablets.begin(), tablets.end(), [](const auto& lhs, const auto& rhs) { return lhs->tablet_id() < rhs->tablet_id(); }); string deletion_msg = "Table deleted at " + LocalTimeAsString(); - RETURN_NOT_OK(DeleteTabletListAndSendRequests(tablets, deletion_msg)); + RETURN_NOT_OK(DeleteTabletListAndSendRequests(tablets, deletion_msg, hide_only)); if (IsColocatedParentTable(*table)) { SharedLock catalog_lock(lock_); @@ -7341,41 +7360,51 @@ Status CatalogManager::DeleteTabletsAndSendRequests(const scoped_refptr>& tablets, const std::string& deletion_msg) { - vector, TabletInfo::WriteLock>> tablets_and_locks; + const std::vector>& tablets, const std::string& deletion_msg, + HideOnly hide_only) { + struct TabletAndLock { + TabletInfoPtr tablet; + TabletInfo::WriteLock lock; + }; + std::vector tablets_and_locks; + tablets_and_locks.reserve(tablets.size()); + std::vector tablet_infos; + tablet_infos.reserve(tablets_and_locks.size()); // Grab tablets and tablet write locks. The list should already be in tablet_id sorted order. for (const auto& tablet : tablets) { - auto tablet_lock = tablet->LockForWrite(); - tablets_and_locks.emplace_back(tablet, std::move(tablet_lock)); + tablets_and_locks.push_back(TabletAndLock { + .tablet = tablet, + .lock = tablet->LockForWrite(), + }); + tablet_infos.emplace_back(tablet.get()); } // Mark the tablets as deleted. for (auto& tablet_and_lock : tablets_and_locks) { - auto& tablet = tablet_and_lock.first; - auto& tablet_lock = tablet_and_lock.second; - - LOG(INFO) << "Deleting tablet " << tablet->tablet_id() << " ..."; - DeleteTabletReplicas(tablet.get(), deletion_msg); + auto& tablet = tablet_and_lock.tablet; + auto& tablet_lock = tablet_and_lock.lock; - tablet_lock.mutable_data()->set_state(SysTabletsEntryPB::DELETED, deletion_msg); + if (hide_only) { + LOG(INFO) << "Hiding tablet " << tablet->tablet_id() << " ..."; + tablet_lock.mutable_data()->pb.set_hidden(true); + } else { + LOG(INFO) << "Deleting tablet " << tablet->tablet_id() << " ..."; + tablet_lock.mutable_data()->set_state(SysTabletsEntryPB::DELETED, deletion_msg); + } + DeleteTabletReplicas(tablet.get(), deletion_msg, hide_only); } // Update all the tablet states in raft in bulk. - vector tablet_infos; - tablet_infos.reserve(tablets_and_locks.size()); - for (auto& tab : tablets_and_locks) { - tablet_infos.push_back(tab.first.get()); - } RETURN_NOT_OK(sys_catalog_->UpdateItems(tablet_infos, leader_ready_term())); // Commit the change. for (auto& tablet_and_lock : tablets_and_locks) { - auto& tablet = tablet_and_lock.first; - auto& tablet_lock = tablet_and_lock.second; + auto& tablet = tablet_and_lock.tablet; + auto& tablet_lock = tablet_and_lock.lock; tablet_lock.Commit(); - LOG(INFO) << "Deleted tablet " << tablet->tablet_id(); + LOG(INFO) << (hide_only ? "Hid tablet " : "Deleted tablet ") << tablet->tablet_id(); } return Status::OK(); } @@ -7386,16 +7415,21 @@ void CatalogManager::SendDeleteTabletRequest( const boost::optional& cas_config_opid_index_less_or_equal, const scoped_refptr& table, TSDescriptor* ts_desc, - const string& reason) { + const string& reason, + bool hide_only) { if (PREDICT_FALSE(GetAtomicFlag(&FLAGS_TEST_disable_tablet_deletion))) { return; } - LOG_WITH_PREFIX(INFO) << "Deleting tablet " << tablet_id << " on peer " - << ts_desc->permanent_uuid() << " with delete type " - << TabletDataState_Name(delete_type) << " (" << reason << ")"; + LOG_WITH_PREFIX(INFO) + << (hide_only ? "Hiding" : "Deleting") << " tablet " << tablet_id << " on peer " + << ts_desc->permanent_uuid() << " with delete type " + << TabletDataState_Name(delete_type) << " (" << reason << ")"; auto call = std::make_shared(master_, AsyncTaskPool(), ts_desc->permanent_uuid(), table, tablet_id, delete_type, cas_config_opid_index_less_or_equal, reason); + if (hide_only) { + call->set_hide_only(hide_only); + } if (table != nullptr) { table->AddTask(call); } @@ -7808,7 +7842,8 @@ Status CatalogManager::ProcessPendingAssignments(const TabletInfos& tablets) { // This is asynchronous / non-blocking. for (auto* tablet : deferred.tablets_to_update) { if (tablet->metadata().dirty().is_deleted()) { - DeleteTabletReplicas(tablet, tablet->metadata().dirty().pb.state_msg()); + // Actual delete, because we delete tablet replica. + DeleteTabletReplicas(tablet, tablet->metadata().dirty().pb.state_msg(), HideOnly::kFalse); } } // Send the CreateTablet() requests to the servers. This is asynchronous / non-blocking. @@ -7948,7 +7983,8 @@ Status CatalogManager::HandlePlacementUsingPlacementInfo(const PlacementInfoPB& } Status CatalogManager::SendCreateTabletRequests(const vector& tablets) { - auto schedules_to_tablets_map = VERIFY_RESULT(MakeSnapshotSchedulesToTabletsMap()); + auto schedules_to_tablets_map = VERIFY_RESULT(MakeSnapshotSchedulesToObjectIdsMap( + SysRowEntry::TABLET)); for (TabletInfo *tablet : tablets) { const consensus::RaftConfigPB& config = tablet->metadata().dirty().pb.committed_consensus_state().config(); @@ -8194,6 +8230,9 @@ Status CatalogManager::BuildLocationsForTablet(const scoped_refptr& TabletLocationsPB* locs_pb) { { auto l_tablet = tablet->LockForRead(); + if (l_tablet->pb.hidden()) { + return STATUS_FORMAT(NotFound, "Tablet hidden", tablet->id()); + } locs_pb->set_table_id(l_tablet->pb.table_id()); *locs_pb->mutable_table_ids() = l_tablet->pb.table_ids(); } diff --git a/src/yb/master/catalog_manager.h b/src/yb/master/catalog_manager.h index 2125cdf6d14e..fd8f25903308 100644 --- a/src/yb/master/catalog_manager.h +++ b/src/yb/master/catalog_manager.h @@ -153,6 +153,8 @@ class BlacklistState { int64_t initial_load_; }; +YB_STRONGLY_TYPED_BOOL(HideOnly); + // The component of the master which tracks the state and location // of tables/tablets in the cluster. // @@ -1086,16 +1088,18 @@ class CatalogManager : public tserver::TabletPeerLookupIf { // Delete the specified table in memory. The TableInfo, DeletedTableInfo and lock of the deleted // table are appended to the lists. The caller will be responsible for committing the change and // deleting the actual table and tablets. - CHECKED_STATUS DeleteTableInMemory(const TableIdentifierPB& table_identifier, - bool is_index_table, - bool update_indexed_table, - std::vector>* tables, - std::vector* table_lcks, - DeleteTableResponsePB* resp, - rpc::RpcContext* rpc); + CHECKED_STATUS DeleteTableInMemory( + const TableIdentifierPB& table_identifier, + bool is_index_table, + bool update_indexed_table, + const SnapshotSchedulesToObjectIdsMap& schedules_to_tables_map, + std::vector>* tables, + std::vector* table_lcks, + DeleteTableResponsePB* resp, + rpc::RpcContext* rpc); // Request tablet servers to delete all replicas of the tablet. - void DeleteTabletReplicas(TabletInfo* tablet, const std::string& msg); + void DeleteTabletReplicas(TabletInfo* tablet, const std::string& msg, bool hide_only); // Returns error if and only if it is forbidden to both: // 1) Delete single tablet from table. @@ -1105,11 +1109,12 @@ class CatalogManager : public tserver::TabletPeerLookupIf { // Marks each of the tablets in the given table as deleted and triggers requests to the tablet // servers to delete them. The table parameter is expected to be given "write locked". - CHECKED_STATUS DeleteTabletsAndSendRequests(const scoped_refptr& table); + CHECKED_STATUS DeleteTabletsAndSendRequests(const TableInfoPtr& table, HideOnly hide_only); // Marks each tablet as deleted and triggers requests to the tablet servers to delete them. CHECKED_STATUS DeleteTabletListAndSendRequests( - const std::vector>& tablets, const std::string& deletion_msg); + const std::vector>& tablets, const std::string& deletion_msg, + HideOnly hide_only); // Send the "delete tablet request" to the specified TS/tablet. // The specified 'reason' will be logged on the TS. @@ -1118,7 +1123,8 @@ class CatalogManager : public tserver::TabletPeerLookupIf { const boost::optional& cas_config_opid_index_less_or_equal, const scoped_refptr& table, TSDescriptor* ts_desc, - const std::string& reason); + const std::string& reason, + bool hide_only = false); // Start a task to request the specified tablet leader to step down and optionally to remove // the server that is over-replicated. A new tablet server can be specified to start an election @@ -1262,8 +1268,9 @@ class CatalogManager : public tserver::TabletPeerLookupIf { // the cluster config affinity specification. CHECKED_STATUS SysCatalogRespectLeaderAffinity(); - virtual Result MakeSnapshotSchedulesToTabletsMap() { - return SnapshotSchedulesToTabletsMap(); + virtual Result MakeSnapshotSchedulesToObjectIdsMap( + SysRowEntry::Type type) { + return SnapshotSchedulesToObjectIdsMap(); } // ---------------------------------------------------------------------------------------------- diff --git a/src/yb/master/master.proto b/src/yb/master/master.proto index 434c876b44a9..9156454388fd 100644 --- a/src/yb/master/master.proto +++ b/src/yb/master/master.proto @@ -332,6 +332,8 @@ message SysTabletsEntryPB { // Tablet IDs for this tablet split if they have been registered in master. repeated bytes split_tablet_ids = 14; + + optional bool hidden = 15; } // The on-disk entry in the sys.catalog table ("metadata" column) for diff --git a/src/yb/master/master_snapshot_coordinator.cc b/src/yb/master/master_snapshot_coordinator.cc index d6c21753e9a6..aa4996ba9c5e 100644 --- a/src/yb/master/master_snapshot_coordinator.cc +++ b/src/yb/master/master_snapshot_coordinator.cc @@ -468,8 +468,14 @@ class MasterSnapshotCoordinator::Impl { // TODO(pitr) Notify user about failures. auto status = context_.VerifyRestoredObjects(*restoration); LOG_IF(DFATAL, !status.ok()) << "Verify restoration failed: " << status; + std::vector restore_tablets; + for (const auto& id_and_type : restoration->objects_to_restore) { + if (id_and_type.second == SysRowEntry::TABLET) { + restore_tablets.push_back(id_and_type.first); + } + } status = DoRestore(restoration->snapshot_id, restoration->restore_at, - restoration->restoration_id, restoration->obsolete_tablets, + restoration->restoration_id, restore_tablets, RestorePhase::kPostSysCatalogLoad, term); LOG_IF(DFATAL, !status.ok()) << "Failed to restore tablets for restoration " @@ -477,7 +483,8 @@ class MasterSnapshotCoordinator::Impl { } } - Result MakeSnapshotSchedulesToTabletsMap() { + Result MakeSnapshotSchedulesToObjectIdsMap( + SysRowEntry::Type type) { std::vector> schedules; { std::lock_guard lock(mutex_); @@ -485,16 +492,16 @@ class MasterSnapshotCoordinator::Impl { schedules.emplace_back(schedule->id(), schedule->options().filter()); } } - SnapshotSchedulesToTabletsMap result; + SnapshotSchedulesToObjectIdsMap result; for (const auto& id_and_filter : schedules) { auto entries = VERIFY_RESULT(CollectEntries(id_and_filter.second)); - auto& tablets = result[id_and_filter.first]; + auto& ids = result[id_and_filter.first]; for (const auto& entry : entries.entries()) { - if (entry.type() == SysRowEntry::TABLET) { - tablets.push_back(entry.id()); + if (entry.type() == type) { + ids.push_back(entry.id()); } } - std::sort(tablets.begin(), tablets.end()); + std::sort(ids.begin(), ids.end()); } return result; } @@ -913,10 +920,11 @@ class MasterSnapshotCoordinator::Impl { CHECKED_STATUS DoRestore( const TxnSnapshotId& snapshot_id, HybridTime restore_at, - const TxnSnapshotRestorationId& restoration_id, const std::vector& obsolete_tablets, + const TxnSnapshotRestorationId& restoration_id, const std::vector& restore_tablets, RestorePhase phase, int64_t leader_term) { TabletInfos tablet_infos; bool restore_sys_catalog; + std::unordered_set snapshot_tablets; { std::lock_guard lock(mutex_); SnapshotState& snapshot = VERIFY_RESULT(FindSnapshot(snapshot_id)); @@ -933,13 +941,16 @@ class MasterSnapshotCoordinator::Impl { restoration_ptr = &VERIFY_RESULT(FindRestoration(restoration_id)).get(); } if (!restore_sys_catalog) { - LOG(INFO) << "PITR: Obsolete tablets: " << AsString(obsolete_tablets); - // New tablets could be created between restoration point and snapshot time. - // In this case we should remove those tablets from restoration, since they will be deleted - // and we should NOT restore data on them. - restoration_ptr->RemoveTablets(obsolete_tablets); + if (phase == RestorePhase::kPostSysCatalogLoad) { + LOG(INFO) << "PITR: Restore tablets: " << AsString(restore_tablets); + // New tablets could be changed between restoration point and snapshot time. + // So we take tablets list from actual catalog state. + restoration_ptr->InitTabletIds(restore_tablets); + } tablet_infos = restoration_ptr->PrepareOperations(); } + auto tablet_ids = snapshot.tablet_ids(); + snapshot_tablets.insert(tablet_ids.begin(), tablet_ids.end()); } // If sys catalog is restored, then tablets data will be restored after that using postponed @@ -949,9 +960,13 @@ class MasterSnapshotCoordinator::Impl { } else { auto snapshot_id_str = snapshot_id.AsSlice().ToBuffer(); SendMetadata send_metadata(phase == RestorePhase::kPostSysCatalogLoad); + LOG(INFO) << "Restore tablets: " << AsString(tablet_infos); for (const auto& tablet : tablet_infos) { + // If this tablet did not participate in snapshot, i.e. was deleted. + // We just change hybrid hybrid time limit and clear hide state. context_.SendRestoreTabletSnapshotRequest( - tablet, snapshot_id_str, restore_at, send_metadata, + tablet, snapshot_tablets.count(tablet->id()) ? snapshot_id_str : std::string(), + restore_at, send_metadata, MakeDoneCallback(&mutex_, restorations_, restoration_id, tablet->tablet_id())); } } @@ -1091,9 +1106,9 @@ Status MasterSnapshotCoordinator::FillHeartbeatResponse(TSHeartbeatResponsePB* r return impl_->FillHeartbeatResponse(resp); } -Result - MasterSnapshotCoordinator::MakeSnapshotSchedulesToTabletsMap() { - return impl_->MakeSnapshotSchedulesToTabletsMap(); +Result + MasterSnapshotCoordinator::MakeSnapshotSchedulesToObjectIdsMap(SysRowEntry::Type type) { + return impl_->MakeSnapshotSchedulesToObjectIdsMap(type); } void MasterSnapshotCoordinator::SysCatalogLoaded(int64_t term) { diff --git a/src/yb/master/master_snapshot_coordinator.h b/src/yb/master/master_snapshot_coordinator.h index fabf4b1bae6f..8fa0831a8943 100644 --- a/src/yb/master/master_snapshot_coordinator.h +++ b/src/yb/master/master_snapshot_coordinator.h @@ -102,7 +102,8 @@ class MasterSnapshotCoordinator : public tablet::SnapshotCoordinator { void SysCatalogLoaded(int64_t term); // For each returns map from schedule id to sorted vectors of tablets id in this schedule. - Result MakeSnapshotSchedulesToTabletsMap(); + Result MakeSnapshotSchedulesToObjectIdsMap( + SysRowEntry::Type type); void Start(); diff --git a/src/yb/master/state_with_tablets.h b/src/yb/master/state_with_tablets.h index c85b788df94f..5d35b42c80a5 100644 --- a/src/yb/master/state_with_tablets.h +++ b/src/yb/master/state_with_tablets.h @@ -14,6 +14,8 @@ #ifndef YB_MASTER_STATE_WITH_TABLETS_H #define YB_MASTER_STATE_WITH_TABLETS_H +#include + #include #include #include @@ -69,6 +71,7 @@ class StateWithTablets { template void InitTabletIds(const TabletIds& tablet_ids, SysSnapshotEntryPB::State state) { + tablets_.clear(); for (const auto& id : tablet_ids) { tablets_.emplace(id, state); } @@ -123,6 +126,13 @@ class StateWithTablets { virtual bool IsTerminalFailure(const Status& status) = 0; + auto tablet_ids() const { + auto lambda = [](const TabletData& data) { return data.id; }; + return boost::make_iterator_range( + boost::make_transform_iterator(tablets_.begin(), lambda), + boost::make_transform_iterator(tablets_.end(), lambda)); + } + protected: struct TabletData { TabletId id; diff --git a/src/yb/master/system_tablet.h b/src/yb/master/system_tablet.h index 84749cc0b85b..9af6ee968016 100644 --- a/src/yb/master/system_tablet.h +++ b/src/yb/master/system_tablet.h @@ -37,6 +37,10 @@ class SystemTablet : public tablet::AbstractTablet { const TabletId& tablet_id() const override; + bool system() const override { + return true; + } + tablet::TabletRetentionPolicy* RetentionPolicy() override { return nullptr; } diff --git a/src/yb/tablet/abstract_tablet.h b/src/yb/tablet/abstract_tablet.h index 95d603f9714a..364a4383bc6a 100644 --- a/src/yb/tablet/abstract_tablet.h +++ b/src/yb/tablet/abstract_tablet.h @@ -51,6 +51,8 @@ class AbstractTablet { virtual const std::string& tablet_id() const = 0; + virtual bool system() const = 0; + //------------------------------------------------------------------------------------------------ // Redis support. virtual CHECKED_STATUS HandleRedisReadRequest( diff --git a/src/yb/tablet/metadata.proto b/src/yb/tablet/metadata.proto index 0e7c11156d25..85201d1df020 100644 --- a/src/yb/tablet/metadata.proto +++ b/src/yb/tablet/metadata.proto @@ -217,6 +217,9 @@ message RaftGroupReplicaSuperBlockPB { // Is this tablet currently a consumer tablet for 2dc replication. optional bool is_under_twodc_replication = 27; + + // Reject client queries (read and write) to this tablet like it does not present. + optional bool hidden = 28; } message FilePB { diff --git a/src/yb/tablet/operations/snapshot_operation.cc b/src/yb/tablet/operations/snapshot_operation.cc index 072c020703d3..ca44ff472a78 100644 --- a/src/yb/tablet/operations/snapshot_operation.cc +++ b/src/yb/tablet/operations/snapshot_operation.cc @@ -45,6 +45,9 @@ Result SnapshotOperationState::GetSnapshotDir() const { if (!request.snapshot_dir_override().empty()) { return request.snapshot_dir_override(); } + if (request.snapshot_id().empty()) { + return std::string(); + } std::string snapshot_id_str; auto txn_snapshot_id = TryFullyDecodeTxnSnapshotId(request.snapshot_id()); if (txn_snapshot_id) { @@ -62,6 +65,9 @@ Status SnapshotOperationState::DoCheckOperationRequirements() { } const string snapshot_dir = VERIFY_RESULT(GetSnapshotDir()); + if (snapshot_dir.empty()) { + return Status::OK(); + } Status s = tablet()->rocksdb_env().FileExists(snapshot_dir); if (!s.ok()) { diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index b5a6a9ff3a54..13b0bd911728 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -1065,7 +1065,7 @@ Status Tablet::ApplyKeyValueRowOperations( } if (intents_write_batch.Count() != 0) { if (!metadata_->is_under_twodc_replication()) { - RETURN_NOT_OK(metadata_->set_is_under_twodc_replication(true)); + RETURN_NOT_OK(metadata_->SetIsUnderTwodcReplicationAndFlush(true)); } WriteToRocksDB(frontiers, &intents_write_batch, StorageDbType::kIntents); } diff --git a/src/yb/tablet/tablet.h b/src/yb/tablet/tablet.h index 954f22ac58fe..8ec8e85d147d 100644 --- a/src/yb/tablet/tablet.h +++ b/src/yb/tablet/tablet.h @@ -436,6 +436,10 @@ class Tablet : public AbstractTablet, public TransactionIntentApplier { const std::string& tablet_id() const override { return metadata_->raft_group_id(); } + bool system() const override { + return false; + } + // Return the metrics for this tablet. // May be nullptr in unit tests, etc. TabletMetrics* metrics() { return metrics_.get(); } diff --git a/src/yb/tablet/tablet_component.cc b/src/yb/tablet/tablet_component.cc index 3092e4beb169..b158c88b4371 100644 --- a/src/yb/tablet/tablet_component.cc +++ b/src/yb/tablet/tablet_component.cc @@ -65,8 +65,11 @@ rocksdb::Env& TabletComponent::rocksdb_env() const { return tablet_.rocksdb_env(); } -void TabletComponent::ResetYBMetaDataCache() { +void TabletComponent::RefreshYBMetaDataCache() { tablet_.ResetYBMetaDataCache(); + if (!metadata().index_map()->empty()) { + tablet_.CreateNewYBMetaDataCache(); + } } } // namespace tablet diff --git a/src/yb/tablet/tablet_component.h b/src/yb/tablet/tablet_component.h index 30655e804012..0518b260c9c2 100644 --- a/src/yb/tablet/tablet_component.h +++ b/src/yb/tablet/tablet_component.h @@ -54,7 +54,7 @@ class TabletComponent { rocksdb::Env& rocksdb_env() const; - void ResetYBMetaDataCache(); + void RefreshYBMetaDataCache(); private: Tablet& tablet_; diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index 85909d568136..dc87829d3614 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -534,6 +534,7 @@ Status RaftGroupMetadata::LoadFromSuperBlock(const RaftGroupReplicaSuperBlockPB& } cdc_min_replicated_index_ = superblock.cdc_min_replicated_index(); is_under_twodc_replication_ = superblock.is_under_twodc_replication(); + hidden_ = superblock.hidden(); } return Status::OK(); @@ -618,6 +619,7 @@ void RaftGroupMetadata::ToSuperBlockUnlocked(RaftGroupReplicaSuperBlockPB* super pb.set_colocated(colocated_); pb.set_cdc_min_replicated_index(cdc_min_replicated_index_); pb.set_is_under_twodc_replication(is_under_twodc_replication_); + pb.set_hidden(hidden_); superblock->Swap(&pb); } @@ -786,7 +788,7 @@ int64_t RaftGroupMetadata::cdc_min_replicated_index() const { return cdc_min_replicated_index_; } -Status RaftGroupMetadata::set_is_under_twodc_replication(bool is_under_twodc_replication) { +Status RaftGroupMetadata::SetIsUnderTwodcReplicationAndFlush(bool is_under_twodc_replication) { { std::lock_guard lock(data_mutex_); is_under_twodc_replication_ = is_under_twodc_replication; @@ -799,6 +801,19 @@ bool RaftGroupMetadata::is_under_twodc_replication() const { return is_under_twodc_replication_; } +Status RaftGroupMetadata::SetHiddenAndFlush(bool value) { + { + std::lock_guard lock(data_mutex_); + hidden_ = value; + } + return Flush(); +} + +bool RaftGroupMetadata::hidden() const { + std::lock_guard lock(data_mutex_); + return hidden_; +} + void RaftGroupMetadata::set_tablet_data_state(TabletDataState state) { std::lock_guard lock(data_mutex_); tablet_data_state_ = state; diff --git a/src/yb/tablet/tablet_metadata.h b/src/yb/tablet/tablet_metadata.h index 00777e98cf3e..873bcb3cea75 100644 --- a/src/yb/tablet/tablet_metadata.h +++ b/src/yb/tablet/tablet_metadata.h @@ -341,7 +341,7 @@ class RaftGroupMetadata : public RefCountedThreadSafe { int64_t cdc_min_replicated_index() const; - CHECKED_STATUS set_is_under_twodc_replication(bool is_under_twodc_replication); + CHECKED_STATUS SetIsUnderTwodcReplicationAndFlush(bool is_under_twodc_replication); bool is_under_twodc_replication() const; @@ -404,6 +404,9 @@ class RaftGroupMetadata : public RefCountedThreadSafe { void set_tablet_data_state(TabletDataState state); TabletDataState tablet_data_state() const; + CHECKED_STATUS SetHiddenAndFlush(bool value); + bool hidden() const; + CHECKED_STATUS Flush(); // Mark the superblock to be in state 'delete_type', sync it to disk, and @@ -559,6 +562,8 @@ class RaftGroupMetadata : public RefCountedThreadSafe { bool is_under_twodc_replication_ = false; + bool hidden_ = false; + DISALLOW_COPY_AND_ASSIGN(RaftGroupMetadata); }; diff --git a/src/yb/tablet/tablet_snapshots.cc b/src/yb/tablet/tablet_snapshots.cc index 38fbf93fd82d..e52a9b3f3ae2 100644 --- a/src/yb/tablet/tablet_snapshots.cc +++ b/src/yb/tablet/tablet_snapshots.cc @@ -46,6 +46,7 @@ struct TabletSnapshots::RestoreMetadata { boost::optional schema; boost::optional index_map; uint32_t schema_version; + bool hide; }; TabletSnapshots::TabletSnapshots(Tablet* tablet) : TabletComponent(tablet) {} @@ -202,9 +203,11 @@ Status TabletSnapshots::Restore(SnapshotOperationState* tx_state) { const std::string snapshot_dir = VERIFY_RESULT(tx_state->GetSnapshotDir()); auto restore_at = HybridTime::FromPB(tx_state->request()->snapshot_hybrid_time()); - RETURN_NOT_OK_PREPEND( - FileExists(&rocksdb_env(), snapshot_dir), - Format("Snapshot directory does not exist: $0", snapshot_dir)); + if (!snapshot_dir.empty()) { + RETURN_NOT_OK_PREPEND( + FileExists(&rocksdb_env(), snapshot_dir), + Format("Snapshot directory does not exist: $0", snapshot_dir)); + } docdb::ConsensusFrontier frontier; frontier.set_op_id(tx_state->op_id()); @@ -212,9 +215,11 @@ Status TabletSnapshots::Restore(SnapshotOperationState* tx_state) { RestoreMetadata restore_metadata; if (tx_state->request()->has_schema()) { restore_metadata.schema.emplace(); - RETURN_NOT_OK(SchemaFromPB(tx_state->request()->schema(), restore_metadata.schema.get_ptr())); - restore_metadata.index_map.emplace(tx_state->request()->indexes()); - restore_metadata.schema_version = tx_state->request()->schema_version(); + const auto& request = *tx_state->request(); + RETURN_NOT_OK(SchemaFromPB(request.schema(), restore_metadata.schema.get_ptr())); + restore_metadata.index_map.emplace(request.indexes()); + restore_metadata.schema_version = request.schema_version(); + restore_metadata.hide = request.hide(); } const Status s = RestoreCheckpoint(snapshot_dir, restore_at, restore_metadata, frontier); VLOG_WITH_PREFIX(1) << "Complete checkpoint restoring with result " << s << " in folder: " @@ -241,14 +246,21 @@ Status TabletSnapshots::RestoreCheckpoint( const string db_dir = regular_db().GetName(); const std::string intents_db_dir = has_intents_db() ? intents_db().GetName() : std::string(); - // Destroy DB object. - // TODO: snapshot current DB and try to restore it in case of failure. - RETURN_NOT_OK(ResetRocksDBs(Destroy::kTrue, DisableFlushOnShutdown::kTrue)); - - auto s = CopyDirectory(&rocksdb_env(), dir, db_dir, UseHardLinks::kTrue, CreateIfMissing::kTrue); - if (PREDICT_FALSE(!s.ok())) { - LOG_WITH_PREFIX(WARNING) << "Copy checkpoint files status: " << s; - return STATUS(IllegalState, "Unable to copy checkpoint files", s.ToString()); + if (dir.empty()) { + // Just change rocksdb hybrid time limit, because it should be in retention interval. + // TODO(pitr) apply transactions and reset intents. + RETURN_NOT_OK(ResetRocksDBs(Destroy::kFalse, DisableFlushOnShutdown::kFalse)); + } else { + // Destroy DB object. + // TODO: snapshot current DB and try to restore it in case of failure. + RETURN_NOT_OK(ResetRocksDBs(Destroy::kTrue, DisableFlushOnShutdown::kTrue)); + + auto s = CopyDirectory( + &rocksdb_env(), dir, db_dir, UseHardLinks::kTrue, CreateIfMissing::kTrue); + if (PREDICT_FALSE(!s.ok())) { + LOG_WITH_PREFIX(WARNING) << "Copy checkpoint files status: " << s; + return STATUS(IllegalState, "Unable to copy checkpoint files", s.ToString()); + } } { @@ -268,13 +280,13 @@ Status TabletSnapshots::RestoreCheckpoint( tablet().metadata()->SetSchema( *restore_metadata.schema, *restore_metadata.index_map, {} /* deleted_columns */, restore_metadata.schema_version); - RETURN_NOT_OK(tablet().metadata()->Flush()); - ResetYBMetaDataCache(); + RETURN_NOT_OK(tablet().metadata()->SetHiddenAndFlush(restore_metadata.hide)); + RefreshYBMetaDataCache(); } // Reopen database from copied checkpoint. // Note: db_dir == metadata()->rocksdb_dir() is still valid db dir. - s = OpenRocksDBs(); + auto s = OpenRocksDBs(); if (PREDICT_FALSE(!s.ok())) { LOG_WITH_PREFIX(WARNING) << "Failed tablet db opening from checkpoint: " << s; return s; diff --git a/src/yb/tools/CMakeLists.txt b/src/yb/tools/CMakeLists.txt index 51bc4895ca99..764ec5f7e803 100644 --- a/src/yb/tools/CMakeLists.txt +++ b/src/yb/tools/CMakeLists.txt @@ -155,7 +155,11 @@ target_link_libraries(yb-pbc-dump ${LINK_LIBS} ) +add_library(admin-test-base admin-test-base.cc) +target_link_libraries(admin-test-base integration-tests yb_test_util yb_util) + set(YB_TEST_LINK_LIBS + admin-test-base ysck yb-admin_lib yb_client @@ -172,6 +176,10 @@ ADD_YB_TEST_DEPENDENCIES(yb-bulk_load-test yb-generate_partitions_main yb-bulk_load) ADD_YB_TEST(ysck_remote-test) +ADD_YB_TEST(yb-admin-snapshot-schedule-test) +ADD_YB_TEST_DEPENDENCIES(yb-admin-snapshot-schedule-test + yb-admin) +target_link_libraries(yb-admin-snapshot-schedule-test cql_test_util) ADD_YB_TEST(yb-admin-test) ADD_YB_TEST_DEPENDENCIES(yb-admin-test yb-admin) diff --git a/src/yb/tools/admin-test-base.cc b/src/yb/tools/admin-test-base.cc new file mode 100644 index 000000000000..52c8c02cf991 --- /dev/null +++ b/src/yb/tools/admin-test-base.cc @@ -0,0 +1,55 @@ +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#include "yb/tools/admin-test-base.h" + +#include "yb/integration-tests/external_mini_cluster.h" + +#include "yb/util/subprocess.h" + +namespace yb { +namespace tools { + +namespace { + +const char* const kAdminToolName = "yb-admin"; + +} + +// Figure out where the admin tool is. +std::string AdminTestBase::GetAdminToolPath() const { + return GetToolPath(kAdminToolName); +} + +HostPort AdminTestBase::GetMasterAddresses() const { + return cluster_->master()->bound_rpc_addr(); +} + +Result AdminTestBase::CallAdminVec(const std::vector& args) { + std::string result; + LOG(INFO) << "Execute: " << AsString(args); + RETURN_NOT_OK(Subprocess::Call(args, &result)); + return result; +} + +Result AdminTestBase::ParseJson(const std::string& raw) { + rapidjson::Document result; + if (result.Parse(raw.c_str(), raw.length()).HasParseError()) { + return STATUS_FORMAT( + InvalidArgument, "Failed to parse json output $0: $1", result.GetParseError(), raw); + } + return result; +} + +} // namespace tools +} // namespace yb diff --git a/src/yb/tools/admin-test-base.h b/src/yb/tools/admin-test-base.h new file mode 100644 index 000000000000..414de0af13bb --- /dev/null +++ b/src/yb/tools/admin-test-base.h @@ -0,0 +1,51 @@ +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#ifndef YB_TOOLS_ADMIN_TEST_BASE_H +#define YB_TOOLS_ADMIN_TEST_BASE_H + +#include "yb/integration-tests/ts_itest-base.h" + +#include "yb/util/string_util.h" + +namespace yb { +namespace tools { + +class AdminTestBase : public tserver::TabletServerIntegrationTestBase { + public: + // Figure out where the admin tool is. + std::string GetAdminToolPath() const; + + HostPort GetMasterAddresses() const; + + template + Result CallAdmin(Args&&... args) { + return CallAdminVec(ToStringVector( + GetAdminToolPath(), "-master_addresses", GetMasterAddresses(), + std::forward(args)...)); + } + + Result CallAdminVec(const std::vector& args); + + template + Result CallJsonAdmin(Args&&... args) { + return ParseJson(VERIFY_RESULT(CallAdmin(std::forward(args)...))); + } + + Result ParseJson(const std::string& raw); +}; + +} // namespace tools +} // namespace yb + +#endif // YB_TOOLS_ADMIN_TEST_BASE_H diff --git a/src/yb/tools/yb-admin-snapshot-schedule-test.cc b/src/yb/tools/yb-admin-snapshot-schedule-test.cc new file mode 100644 index 000000000000..60be67407546 --- /dev/null +++ b/src/yb/tools/yb-admin-snapshot-schedule-test.cc @@ -0,0 +1,325 @@ +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#include "yb/tools/admin-test-base.h" + +#include "yb/client/ql-dml-test-base.h" + +#include "yb/integration-tests/cql_test_util.h" +#include "yb/integration-tests/external_mini_cluster.h" + +#include "yb/master/master_backup.pb.h" + +#include "yb/util/date_time.h" +#include "yb/util/random_util.h" + +#include "yb/yql/pgwrapper/libpq_utils.h" + +namespace yb { +namespace tools { + +namespace { + +Result Get(const rapidjson::Value& value, const char* name) { + auto it = value.FindMember(name); + if (it == value.MemberEnd()) { + return STATUS_FORMAT(InvalidArgument, "Missing $0 field", name); + } + return it->value; +} + +Result Get(rapidjson::Value* value, const char* name) { + auto it = value->FindMember(name); + if (it == value->MemberEnd()) { + return STATUS_FORMAT(InvalidArgument, "Missing $0 field", name); + } + return it->value; +} + +const std::string kDbName = "ybtest"; +const std::string kClusterName = "yugacluster"; + +} // namespace + +class YbAdminSnapshotScheduleTest : public AdminTestBase { + public: + Result GetSnapshotSchedule(const std::string& id = std::string()) { + auto out = VERIFY_RESULT(id.empty() ? CallJsonAdmin("list_snapshot_schedules") + : CallJsonAdmin("list_snapshot_schedules", id)); + auto schedules = VERIFY_RESULT(Get(&out, "schedules")).get().GetArray(); + SCHECK_EQ(schedules.Size(), 1, IllegalState, "Wrong schedules number"); + rapidjson::Document result; + result.CopyFrom(schedules[0], result.GetAllocator()); + return result; + } + + Result WaitScheduleSnapshot( + MonoDelta duration, const std::string& id = std::string(), int num_snashots = 1) { + rapidjson::Document result; + RETURN_NOT_OK(WaitFor([this, id, num_snashots, &result]() -> Result { + auto schedule = VERIFY_RESULT(GetSnapshotSchedule(id)); + auto snapshots = VERIFY_RESULT(Get(&schedule, "snapshots")).get().GetArray(); + if (snapshots.Size() < num_snashots) { + return false; + } + result.CopyFrom(snapshots[snapshots.Size() - 1], result.GetAllocator()); + return true; + }, duration, "Wait schedule snapshot")); + return result; + } + + CHECKED_STATUS RestoreSnapshotSchedule(const std::string& schedule_id, Timestamp restore_at) { + auto out = VERIFY_RESULT(CallJsonAdmin( + "restore_snapshot_schedule", schedule_id, restore_at.ToFormattedString())); + std::string restoration_id = VERIFY_RESULT(Get(out, "restoration_id")).get().GetString(); + LOG(INFO) << "Restoration id: " << restoration_id; + + return WaitRestorationDone(restoration_id, 20s); + } + + CHECKED_STATUS WaitRestorationDone(const std::string& restoration_id, MonoDelta timeout) { + return WaitFor([this, restoration_id]() -> Result { + auto out = VERIFY_RESULT(CallJsonAdmin("list_snapshot_restorations", restoration_id)); + const auto& restorations = VERIFY_RESULT(Get(out, "restorations")).get().GetArray(); + SCHECK_EQ(restorations.Size(), 1, IllegalState, "Wrong restorations number"); + auto id = VERIFY_RESULT(Get(restorations[0], "id")).get().GetString(); + SCHECK_EQ(id, restoration_id, IllegalState, "Wrong restoration id"); + std::string state_str = VERIFY_RESULT(Get(restorations[0], "state")).get().GetString(); + master::SysSnapshotEntryPB::State state; + if (!master::SysSnapshotEntryPB_State_Parse(state_str, &state)) { + return STATUS_FORMAT(IllegalState, "Failed to parse restoration state: $0", state_str); + } + if (state == master::SysSnapshotEntryPB::RESTORING) { + return false; + } + if (state == master::SysSnapshotEntryPB::RESTORED) { + return true; + } + return STATUS_FORMAT(IllegalState, "Unexpected restoration state: $0", + master::SysSnapshotEntryPB_State_Name(state)); + }, timeout, "Wait restoration complete"); + } + + Result PreparePg() { + CreateCluster(kClusterName); + client_ = VERIFY_RESULT(CreateClient()); + + auto conn = VERIFY_RESULT(PgConnect()); + RETURN_NOT_OK(conn.ExecuteFormat("CREATE DATABASE $0", kDbName)); + + auto schedule_id = VERIFY_RESULT(CreateSnapshotSchedule(6s, 10min, "ysql." + kDbName)); + RETURN_NOT_OK(WaitScheduleSnapshot(30s, schedule_id)); + return schedule_id; + } + + Result PgConnect(const std::string& db_name = std::string()) { + auto* ts = cluster_->tablet_server(RandomUniformInt(0, cluster_->num_tablet_servers() - 1)); + return pgwrapper::PGConn::Connect(HostPort(ts->bind_host(), ts->pgsql_rpc_port()), db_name); + } + + Result PrepareCql() { + CreateCluster(kClusterName); + client_ = VERIFY_RESULT(CreateClient()); + + auto conn = VERIFY_RESULT(CqlConnect()); + RETURN_NOT_OK(conn.ExecuteQuery(Format("CREATE KEYSPACE IF NOT EXISTS $0", kDbName))); + + auto schedule_id = VERIFY_RESULT(CreateSnapshotSchedule(6s, 10min, "ycql." + kDbName)); + RETURN_NOT_OK(WaitScheduleSnapshot(30s, schedule_id)); + return schedule_id; + } + + Result CqlConnect(const std::string& db_name = std::string()) { + if (!cql_driver_) { + std::vector hosts; + for (int i = 0; i < cluster_->num_tablet_servers(); ++i) { + hosts.push_back(cluster_->tablet_server(i)->bind_host()); + } + cql_driver_ = std::make_unique( + hosts, cluster_->tablet_server(0)->cql_rpc_port(), true); + } + auto result = VERIFY_RESULT(cql_driver_->CreateSession()); + if (!db_name.empty()) { + RETURN_NOT_OK(result.ExecuteQuery(Format("USE $0", kDbName))); + } + return result; + } + + template + Result CreateSnapshotSchedule( + MonoDelta interval, MonoDelta retention, Args&&... args) { + auto out = VERIFY_RESULT(CallJsonAdmin( + "create_snapshot_schedule", interval.ToMinutes(), retention.ToMinutes(), + std::forward(args)...)); + + std::string schedule_id = VERIFY_RESULT(Get(out, "schedule_id")).get().GetString(); + LOG(INFO) << "Schedule id: " << schedule_id; + return schedule_id; + } + + std::unique_ptr cql_driver_; +}; + +class YbAdminSnapshotScheduleTestWithYsql : public YbAdminSnapshotScheduleTest { + public: + void UpdateMiniClusterOptions(ExternalMiniClusterOptions* opts) override { + opts->enable_ysql = true; + opts->extra_tserver_flags.emplace_back("--ysql_num_shards_per_tserver=1"); + } +}; + +TEST_F(YbAdminSnapshotScheduleTest, Basic) { + BuildAndStart(); + + std::string schedule_id = ASSERT_RESULT(CreateSnapshotSchedule( + 6s, 10min, kTableName.namespace_name(), kTableName.table_name())); + std::this_thread::sleep_for(20s); + + Timestamp last_snapshot_time; + ASSERT_OK(WaitFor([this, schedule_id, &last_snapshot_time]() -> Result { + auto schedule = VERIFY_RESULT(GetSnapshotSchedule()); + auto received_schedule_id = VERIFY_RESULT(Get(schedule, "id")).get().GetString(); + SCHECK_EQ(schedule_id, received_schedule_id, IllegalState, "Wrong schedule id"); + const auto& snapshots = VERIFY_RESULT(Get(schedule, "snapshots")).get().GetArray(); + + if (snapshots.Size() < 2) { + return false; + } + std::string last_snapshot_time_str; + for (const auto& snapshot : snapshots) { + std::string snapshot_time = VERIFY_RESULT( + Get(snapshot, "snapshot_time_utc")).get().GetString(); + if (!last_snapshot_time_str.empty()) { + std::string previous_snapshot_time = VERIFY_RESULT( + Get(snapshot, "previous_snapshot_time_utc")).get().GetString(); + SCHECK_EQ(previous_snapshot_time, last_snapshot_time_str, IllegalState, + "Wrong previous_snapshot_hybrid_time"); + } + last_snapshot_time_str = snapshot_time; + } + LOG(INFO) << "Last snapshot time: " << last_snapshot_time_str; + last_snapshot_time = VERIFY_RESULT(DateTime::TimestampFromString(last_snapshot_time_str)); + return true; + }, 20s, "At least 2 snapshots")); + + last_snapshot_time.set_value(last_snapshot_time.value() + 1); + LOG(INFO) << "Restore at: " << last_snapshot_time.ToFormattedString(); + + ASSERT_OK(RestoreSnapshotSchedule(schedule_id, last_snapshot_time)); +} + +TEST_F(YbAdminSnapshotScheduleTest, UndeleteTable) { + LOG(INFO) << "Create cluster"; + CreateCluster("test-cluster"); + + LOG(INFO) << "Create client"; + client_ = ASSERT_RESULT(CreateClient()); + + LOG(INFO) << "Create namespace"; + ASSERT_OK(client_->CreateNamespaceIfNotExists( + client::kTableName.namespace_name(), client::kTableName.namespace_type())); + + LOG(INFO) << "Create snapshot schedule"; + std::string schedule_id = ASSERT_RESULT(CreateSnapshotSchedule( + 6s, 10min, client::kTableName.namespace_name())); + + LOG(INFO) << "Wait snapshot schedule"; + ASSERT_OK(WaitScheduleSnapshot(30s, schedule_id)); + + auto session = client_->NewSession(); + LOG(INFO) << "Create table"; + ASSERT_NO_FATALS(client::kv_table_test::CreateTable( + client::Transactional::kTrue, 3, client_.get(), &table_)); + + LOG(INFO) << "Write values"; + constexpr int kMinKey = 1; + constexpr int kMaxKey = 100; + for (int i = kMinKey; i <= kMaxKey; ++i) { + ASSERT_OK(client::kv_table_test::WriteRow(&table_, session, i, -i)); + } + + Timestamp time(ASSERT_RESULT(WallClock()->Now()).time_point); + + LOG(INFO) << "Delete table"; + ASSERT_OK(client_->DeleteTable(client::kTableName)); + + ASSERT_NOK(client::kv_table_test::WriteRow(&table_, session, kMinKey, 0)); + + LOG(INFO) << "Restore schedule"; + ASSERT_OK(RestoreSnapshotSchedule(schedule_id, time)); + + LOG(INFO) << "Reading rows"; + auto rows = ASSERT_RESULT(client::kv_table_test::SelectAllRows(&table_, session)); + LOG(INFO) << "Rows: " << AsString(rows); + ASSERT_EQ(rows.size(), kMaxKey - kMinKey + 1); + for (int i = kMinKey; i <= kMaxKey; ++i) { + ASSERT_EQ(rows[i], -i); + } + + constexpr int kExtraKey = kMaxKey + 1; + ASSERT_OK(client::kv_table_test::WriteRow(&table_, session, kExtraKey, -kExtraKey)); + auto extra_value = ASSERT_RESULT(client::kv_table_test::SelectRow(&table_, session, kExtraKey)); + ASSERT_EQ(extra_value, -kExtraKey); +} + +TEST_F_EX(YbAdminSnapshotScheduleTest, YB_DISABLE_TEST_IN_TSAN(Pgsql), + YbAdminSnapshotScheduleTestWithYsql) { + auto schedule_id = ASSERT_RESULT(PreparePg()); + + auto conn = ASSERT_RESULT(PgConnect(kDbName)); + + ASSERT_OK(conn.Execute("CREATE TABLE test_table (key INT PRIMARY KEY, value TEXT)")); + + ASSERT_OK(conn.Execute("INSERT INTO test_table VALUES (1, 'before')")); + + Timestamp time(ASSERT_RESULT(WallClock()->Now()).time_point); + + ASSERT_OK(conn.Execute("UPDATE test_table SET value = 'after'")); + + ASSERT_OK(RestoreSnapshotSchedule(schedule_id, time)); + + auto res = ASSERT_RESULT(conn.FetchValue("SELECT value FROM test_table")); + + ASSERT_EQ(res, "before"); +} + +TEST_F(YbAdminSnapshotScheduleTest, UndeleteIndex) { + auto schedule_id = ASSERT_RESULT(PrepareCql()); + + auto conn = ASSERT_RESULT(CqlConnect(kDbName)); + + ASSERT_OK(conn.ExecuteQuery( + "CREATE TABLE test_table (key INT PRIMARY KEY, value TEXT) " + "WITH transactions = { 'enabled' : true }")); + ASSERT_OK(conn.ExecuteQuery("CREATE UNIQUE INDEX test_table_idx ON test_table (value)")); + + ASSERT_OK(conn.ExecuteQuery("INSERT INTO test_table (key, value) VALUES (1, 'value')")); + + Timestamp time(ASSERT_RESULT(WallClock()->Now()).time_point); + + ASSERT_OK(conn.ExecuteQuery("DROP INDEX test_table_idx")); + + ASSERT_OK(conn.ExecuteQuery("INSERT INTO test_table (key, value) VALUES (3, 'value')")); + + ASSERT_OK(RestoreSnapshotSchedule(schedule_id, time)); + + ASSERT_NOK(conn.ExecuteQuery("INSERT INTO test_table (key, value) VALUES (5, 'value')")); + + auto res = ASSERT_RESULT(conn.FetchValue( + "SELECT key FROM test_table WHERE value = 'value'")); + + ASSERT_EQ(res, 1); +} + +} // namespace tools +} // namespace yb diff --git a/src/yb/tools/yb-admin-test.cc b/src/yb/tools/yb-admin-test.cc index ed4df881d60c..ef4d41b9d8aa 100644 --- a/src/yb/tools/yb-admin-test.cc +++ b/src/yb/tools/yb-admin-test.cc @@ -50,11 +50,12 @@ #include "yb/integration-tests/cluster_verifier.h" #include "yb/integration-tests/external_mini_cluster.h" #include "yb/integration-tests/test_workload.h" -#include "yb/integration-tests/ts_itest-base.h" #include "yb/master/master_defaults.h" #include "yb/master/master_backup.pb.h" +#include "yb/tools/admin-test-base.h" + #include "yb/util/date_time.h" #include "yb/util/jsonreader.h" #include "yb/util/net/net_util.h" @@ -66,8 +67,6 @@ #include "yb/util/subprocess.h" #include "yb/util/test_util.h" -#include "yb/yql/pgwrapper/libpq_utils.h" - using namespace std::literals; namespace yb { @@ -84,29 +83,10 @@ using std::shared_ptr; using std::vector; using itest::TabletServerMap; using itest::TServerDetails; -using pgwrapper::PGConn; using strings::Substitute; namespace { -Result Get(const rapidjson::Value& value, const char* name) { - auto it = value.FindMember(name); - if (it == value.MemberEnd()) { - return STATUS_FORMAT(InvalidArgument, "Missing $0 field", name); - } - return it->value; -} - -Result Get(rapidjson::Value* value, const char* name) { - auto it = value->FindMember(name); - if (it == value->MemberEnd()) { - return STATUS_FORMAT(InvalidArgument, "Missing $0 field", name); - } - return it->value; -} - -static const char* const kAdminToolName = "yb-admin"; - // Helper to check hosts list by requesting cluster config via yb-admin and parse its output: // // Config: @@ -171,100 +151,9 @@ class BlacklistChecker { } // namespace -class AdminCliTest : public tserver::TabletServerIntegrationTestBase { - protected: - // Figure out where the admin tool is. - std::string GetAdminToolPath() const; - - template - Result CallAdmin(Args&&... args) { - std::string result; - RETURN_NOT_OK(Subprocess::Call( - ToStringVector( - GetAdminToolPath(), "-master_addresses", cluster_->master()->bound_rpc_addr(), - std::forward(args)...), - &result)); - return result; - } - - template - Result CallJsonAdmin(Args&&... args) { - auto raw = VERIFY_RESULT(CallAdmin(std::forward(args)...)); - rapidjson::Document result; - if (result.Parse(raw.c_str(), raw.length()).HasParseError()) { - return STATUS_FORMAT( - InvalidArgument, "Failed to parse json output $0: $1", result.GetParseError(), raw); - } - return result; - } - - Result GetSnapshotSchedule(const std::string& id = std::string()) { - auto out = VERIFY_RESULT(id.empty() ? CallJsonAdmin("list_snapshot_schedules") - : CallJsonAdmin("list_snapshot_schedules", id)); - auto schedules = VERIFY_RESULT(Get(&out, "schedules")).get().GetArray(); - SCHECK_EQ(schedules.Size(), 1, IllegalState, "Wrong schedules number"); - rapidjson::Document result; - result.CopyFrom(schedules[0], result.GetAllocator()); - return result; - } - - Result WaitScheduleSnapshot( - MonoDelta duration, const std::string& id = std::string(), int num_snashots = 1) { - rapidjson::Document result; - RETURN_NOT_OK(WaitFor([this, id, num_snashots, &result]() -> Result { - auto schedule = VERIFY_RESULT(GetSnapshotSchedule(id)); - auto snapshots = VERIFY_RESULT(Get(&schedule, "snapshots")).get().GetArray(); - if (snapshots.Size() < num_snashots) { - return false; - } - result.CopyFrom(snapshots[snapshots.Size() - 1], result.GetAllocator()); - return true; - }, duration, "Wait schedule snapshot")); - return result; - } - - CHECKED_STATUS RestoreSnapshotSchedule(const std::string& schedule_id, Timestamp restore_at) { - auto out = VERIFY_RESULT(CallJsonAdmin( - "restore_snapshot_schedule", schedule_id, restore_at.ToFormattedString())); - std::string restoration_id = VERIFY_RESULT(Get(out, "restoration_id")).get().GetString(); - LOG(INFO) << "Restoration id: " << restoration_id; - - return WaitRestorationDone(restoration_id, 20s); - } - - CHECKED_STATUS WaitRestorationDone(const std::string& restoration_id, MonoDelta timeout) { - return WaitFor([this, restoration_id]() -> Result { - auto out = VERIFY_RESULT(CallJsonAdmin("list_snapshot_restorations", restoration_id)); - const auto& restorations = VERIFY_RESULT(Get(out, "restorations")).get().GetArray(); - SCHECK_EQ(restorations.Size(), 1, IllegalState, "Wrong restorations number"); - auto id = VERIFY_RESULT(Get(restorations[0], "id")).get().GetString(); - SCHECK_EQ(id, restoration_id, IllegalState, "Wrong restoration id"); - std::string state_str = VERIFY_RESULT(Get(restorations[0], "state")).get().GetString(); - master::SysSnapshotEntryPB::State state; - if (!master::SysSnapshotEntryPB_State_Parse(state_str, &state)) { - return STATUS_FORMAT(IllegalState, "Failed to parse restoration state: $0", state_str); - } - if (state == master::SysSnapshotEntryPB::RESTORING) { - return false; - } - if (state == master::SysSnapshotEntryPB::RESTORED) { - return true; - } - return STATUS_FORMAT(IllegalState, "Unexpected restoration state: $0", - master::SysSnapshotEntryPB_State_Name(state)); - }, timeout, "Wait restoration complete"); - } - - Result PgConnect(const std::string& db_name = std::string()) { - auto* ts = cluster_->tablet_server(RandomUniformInt(0, cluster_->num_tablet_servers() - 1)); - return PGConn::Connect(HostPort(ts->bind_host(), ts->pgsql_rpc_port()), db_name); - } +class AdminCliTest : public AdminTestBase { }; -string AdminCliTest::GetAdminToolPath() const { - return GetToolPath(kAdminToolName); -} - // Test yb-admin config change while running a workload. // 1. Instantiate external mini cluster with 3 TS. // 2. Create table with 2 replicas. @@ -550,49 +439,6 @@ TEST_F(AdminCliTest, TestSnapshotCreation) { ASSERT_NE(output.find(kTableName.table_name()), string::npos); } -TEST_F(AdminCliTest, SnapshotSchedule) { - BuildAndStart(); - - auto out = ASSERT_RESULT(CallJsonAdmin( - "create_snapshot_schedule", 0.1, 10, kTableName.namespace_name(), kTableName.table_name())); - - std::string schedule_id = ASSERT_RESULT(Get(out, "schedule_id")).get().GetString(); - LOG(INFO) << "Schedule id: " << schedule_id; - std::this_thread::sleep_for(20s); - - Timestamp last_snapshot_time; - ASSERT_OK(WaitFor([this, schedule_id, &last_snapshot_time]() -> Result { - auto schedule = VERIFY_RESULT(GetSnapshotSchedule()); - auto received_schedule_id = VERIFY_RESULT(Get(schedule, "id")).get().GetString(); - SCHECK_EQ(schedule_id, received_schedule_id, IllegalState, "Wrong schedule id"); - const auto& snapshots = VERIFY_RESULT(Get(schedule, "snapshots")).get().GetArray(); - - if (snapshots.Size() < 2) { - return false; - } - std::string last_snapshot_time_str; - for (const auto& snapshot : snapshots) { - std::string snapshot_time = VERIFY_RESULT( - Get(snapshot, "snapshot_time_utc")).get().GetString(); - if (!last_snapshot_time_str.empty()) { - std::string previous_snapshot_time = VERIFY_RESULT( - Get(snapshot, "previous_snapshot_time_utc")).get().GetString(); - SCHECK_EQ(previous_snapshot_time, last_snapshot_time_str, IllegalState, - "Wrong previous_snapshot_hybrid_time"); - } - last_snapshot_time_str = snapshot_time; - } - LOG(INFO) << "Last snapshot time: " << last_snapshot_time_str; - last_snapshot_time = VERIFY_RESULT(DateTime::TimestampFromString(last_snapshot_time_str)); - return true; - }, 20s, "At least 2 snapshots")); - - last_snapshot_time.set_value(last_snapshot_time.value() + 1); - LOG(INFO) << "Restore at: " << last_snapshot_time.ToFormattedString(); - - ASSERT_OK(RestoreSnapshotSchedule(schedule_id, last_snapshot_time)); -} - TEST_F(AdminCliTest, GetIsLoadBalancerIdle) { const MonoDelta kWaitTime = 20s; std::string output; @@ -820,43 +666,5 @@ TEST_F(AdminCliTest, TestClearPlacementPolicy) { ASSERT_TRUE(output.find("replicationInfo") == std::string::npos); } -class AdminCliTestWithYsql : public AdminCliTest { - public: - void UpdateMiniClusterOptions(ExternalMiniClusterOptions* opts) override { - opts->enable_ysql = true; - opts->extra_tserver_flags.emplace_back("--ysql_num_shards_per_tserver=1"); - } -}; - -TEST_F_EX(AdminCliTest, YB_DISABLE_TEST_IN_TSAN(SnapshotSchedulePgsql), AdminCliTestWithYsql) { - const std::string kDbName = "ybtest"; - - CreateCluster("raft_consensus-itest-cluster"); - client_ = ASSERT_RESULT(CreateClient()); - - auto conn = ASSERT_RESULT(PgConnect()); - ASSERT_OK(conn.ExecuteFormat("CREATE DATABASE $0", kDbName)); - - auto out = ASSERT_RESULT(CallJsonAdmin("create_snapshot_schedule", 0.1, 10, "ysql." + kDbName)); - std::string schedule_id = ASSERT_RESULT(Get(out, "schedule_id")).get().GetString(); - ASSERT_OK(WaitScheduleSnapshot(30s, schedule_id)); - - conn = ASSERT_RESULT(PgConnect(kDbName)); - - ASSERT_OK(conn.Execute("CREATE TABLE test_table (key INT PRIMARY KEY, value TEXT)")); - - ASSERT_OK(conn.Execute("INSERT INTO test_table VALUES (1, 'before')")); - - Timestamp time(ASSERT_RESULT(WallClock()->Now()).time_point); - - ASSERT_OK(conn.Execute("UPDATE test_table SET value = 'after'")); - - ASSERT_OK(RestoreSnapshotSchedule(schedule_id, time)); - - auto res = ASSERT_RESULT(conn.FetchValue("SELECT value FROM test_table")); - - ASSERT_EQ(res, "before"); -} - } // namespace tools } // namespace yb diff --git a/src/yb/tserver/backup.proto b/src/yb/tserver/backup.proto index 37a7a4b6b724..7ecac70bf84a 100644 --- a/src/yb/tserver/backup.proto +++ b/src/yb/tserver/backup.proto @@ -70,6 +70,8 @@ message TabletSnapshotOpRequestPB { repeated IndexInfoPB indexes = 14; fixed64 previous_snapshot_hybrid_time = 15; + + bool hide = 16; } message TabletSnapshotOpResponsePB { diff --git a/src/yb/tserver/tablet_service.cc b/src/yb/tserver/tablet_service.cc index 3d26cbd5876d..07457a8ceeff 100644 --- a/src/yb/tserver/tablet_service.cc +++ b/src/yb/tserver/tablet_service.cc @@ -1213,6 +1213,7 @@ void TabletServiceAdminImpl::DeleteTablet(const DeleteTabletRequestPB* req, Status s = server_->tablet_manager()->DeleteTablet(req->tablet_id(), delete_type, cas_config_opid_index_less_or_equal, + req->hide_only(), &error_code); if (PREDICT_FALSE(!s.ok())) { HandleErrorResponse(resp, &context, s, error_code); @@ -1411,6 +1412,13 @@ void TabletServiceImpl::Write(const WriteRequestPB* req, return; } + if (tablet.peer->tablet()->metadata()->hidden()) { + auto status = STATUS(NotFound, "Tablet not found", req->tablet_id()); + SetupErrorAndRespond( + resp->mutable_error(), status, TabletServerErrorPB::TABLET_NOT_FOUND, &context); + return; + } + #if defined(DUMP_WRITE) if (req->has_write_batch() && req->write_batch().has_transaction()) { VLOG(1) << "Write with transaction: " << req->write_batch().transaction().ShortDebugString(); @@ -1909,6 +1917,14 @@ void TabletServiceImpl::Read(const ReadRequestPB* req, leader_peer.leader_term = yb::OpId::kUnknownTerm; } + if (!read_context.tablet->system() && + down_cast(read_context.tablet.get())->metadata()->hidden()) { + auto status = STATUS(NotFound, "Tablet not found", req->tablet_id()); + SetupErrorAndRespond( + resp->mutable_error(), status, TabletServerErrorPB::TABLET_NOT_FOUND, &context); + return; + } + if (FLAGS_TEST_simulate_time_out_failures_msecs > 0 && RandomUniformInt(0, 10) < 2) { LOG(INFO) << "Marking request as timed out for test: " << req->ShortDebugString(); SleepFor(MonoDelta::FromMilliseconds(FLAGS_TEST_simulate_time_out_failures_msecs)); diff --git a/src/yb/tserver/ts_tablet_manager-test.cc b/src/yb/tserver/ts_tablet_manager-test.cc index a14c071ea290..5eb29da959dd 100644 --- a/src/yb/tserver/ts_tablet_manager-test.cc +++ b/src/yb/tserver/ts_tablet_manager-test.cc @@ -232,6 +232,7 @@ TEST_F(TsTabletManagerTest, TestTombstonedTabletsAreUnregistered) { ASSERT_OK(tablet_manager_->DeleteTablet(kTabletId1, tablet::TABLET_DATA_TOMBSTONED, cas_config_opid_index_less_or_equal, + false, &error_code)); assert_tablet_assignment_count(kTabletId1, 0); @@ -245,6 +246,7 @@ TEST_F(TsTabletManagerTest, TestTombstonedTabletsAreUnregistered) { ASSERT_OK(tablet_manager_->DeleteTablet(kTabletId1, tablet::TABLET_DATA_DELETED, cas_config_opid_index_less_or_equal, + false, &error_code)); assert_tablet_assignment_count(kTabletId1, 0); diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index 74ac4b99d8f7..372dccf32fdb 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -1136,6 +1136,7 @@ Status TSTabletManager::DeleteTablet( const string& tablet_id, TabletDataState delete_type, const boost::optional& cas_config_opid_index_less_or_equal, + bool hide_only, boost::optional* error_code) { if (delete_type != TABLET_DATA_DELETED && delete_type != TABLET_DATA_TOMBSTONED) { @@ -1200,6 +1201,9 @@ Status TSTabletManager::DeleteTablet( } RaftGroupMetadataPtr meta = tablet_peer->tablet_metadata(); + if (hide_only) { + return meta->SetHiddenAndFlush(true); + } // No matter if the tablet was deleted (drop table), or tombstoned (potentially moved to a // different TS), we do not need to flush rocksdb anymore, as this data is irrelevant. // diff --git a/src/yb/tserver/ts_tablet_manager.h b/src/yb/tserver/ts_tablet_manager.h index 46324a716b9f..c4fa5c5804fe 100644 --- a/src/yb/tserver/ts_tablet_manager.h +++ b/src/yb/tserver/ts_tablet_manager.h @@ -179,10 +179,13 @@ class TSTabletManager : public tserver::TabletPeerLookupIf, public tablet::Table // raft config change op has an opid_index equal to or less than the specified // value. If not, 'error_code' is set to CAS_FAILED and a non-OK Status is // returned. - CHECKED_STATUS DeleteTablet(const TabletId& tablet_id, - tablet::TabletDataState delete_type, - const boost::optional& cas_config_opid_index_less_or_equal, - boost::optional* error_code); + // If `hide_only` is true, then just hide tablet instead of deleting it. + CHECKED_STATUS DeleteTablet( + const TabletId& tablet_id, + tablet::TabletDataState delete_type, + const boost::optional& cas_config_opid_index_less_or_equal, + bool hide_only, + boost::optional* error_code); // Lookup the given tablet peer by its ID. // Returns true if the tablet is found successfully. diff --git a/src/yb/tserver/tserver_admin.proto b/src/yb/tserver/tserver_admin.proto index 835005d5b586..ad93ee810fcf 100644 --- a/src/yb/tserver/tserver_admin.proto +++ b/src/yb/tserver/tserver_admin.proto @@ -216,6 +216,9 @@ message DeleteTabletRequestPB { // the value of the committed config opid index and shutting down the tablet // for deletion. See comments in ts_tablet_manager.cc optional int64 cas_config_opid_index_less_or_equal = 5; + + // Hide tablet instead of deleting it. + optional bool hide_only = 6; } message DeleteTabletResponsePB {