@@ -4561,6 +4561,36 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
45614561
45624562 // Read index build
45634563 {
4564+ auto fillBuildInfoSafe = [&](TIndexBuildInfo& buildInfo, const TString& stepName, const auto & fill) {
4565+ try {
4566+ fill (buildInfo);
4567+ } catch (const std::exception& exc) {
4568+ LOG_ERROR_S (ctx, NKikimrServices::BUILD_INDEX,
4569+ " Init " << stepName << " unhandled exception, id#" << buildInfo.Id
4570+ << " " << TypeName (exc) << " : " << exc.what () << Endl
4571+ << TBackTrace::FromCurrentException ().PrintToString ()
4572+ << " , TIndexBuildInfo: " << buildInfo);
4573+
4574+ // in-memory volatile state:
4575+ buildInfo.IsBroken = true ;
4576+ buildInfo.AddIssue (TStringBuilder () << " Init " << stepName << " unhandled exception " << exc.what ());
4577+ }
4578+ };
4579+
4580+ auto fillBuildInfoByIdSafe = [&](TIndexBuildId id, const TString& stepName, const auto & fill) {
4581+ const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4582+ Y_ASSERT (buildInfoPtr);
4583+ if (!buildInfoPtr) {
4584+ LOG_ERROR_S (ctx, NKikimrServices::BUILD_INDEX,
4585+ " Init " << stepName << " BuildInfo not found: id#" << id);
4586+ return ;
4587+ }
4588+ auto & buildInfo = *buildInfoPtr->Get ();
4589+ if (!buildInfo.IsBroken ) {
4590+ fillBuildInfoSafe (buildInfo, stepName, fill);
4591+ }
4592+ };
4593+
45644594 // read main info
45654595 {
45664596 auto rowset = db.Table <Schema::IndexBuild>().Range ().Select ();
@@ -4569,17 +4599,21 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
45694599 }
45704600
45714601 while (!rowset.EndOfSet ()) {
4572- TIndexBuildInfo::TPtr indexInfo = TIndexBuildInfo::FromRow (rowset);
4573-
4574- auto [it, emplaced] = Self->IndexBuilds .emplace (indexInfo->Id , indexInfo);
4575- Y_ABORT_UNLESS (emplaced);
4576- if (indexInfo->Uid ) {
4577- // TODO(mbkkt) It also should be unique, but we're not sure.
4578- Y_ASSERT (!Self->IndexBuildsByUid .contains (indexInfo->Uid ));
4579- Self->IndexBuildsByUid [indexInfo->Uid ] = indexInfo;
4602+ TIndexBuildInfo::TPtr buildInfo = new TIndexBuildInfo ();
4603+ fillBuildInfoSafe (*buildInfo, " IndexBuild" , [&](TIndexBuildInfo& buildInfo) {
4604+ TIndexBuildInfo::FillFromRow (rowset, &buildInfo);
4605+ });
4606+
4607+ // Note: broken build are also added to IndexBuilds
4608+ Y_ASSERT (!Self->IndexBuilds .contains (buildInfo->Id ));
4609+ Self->IndexBuilds [buildInfo->Id ] = buildInfo;
4610+
4611+ if (buildInfo->Uid ) {
4612+ Y_ASSERT (!Self->IndexBuildsByUid .contains (buildInfo->Uid ));
4613+ Self->IndexBuildsByUid [buildInfo->Uid ] = buildInfo;
45804614 }
45814615
4582- OnComplete.ToProgress (indexInfo ->Id );
4616+ OnComplete.ToProgress (buildInfo ->Id );
45834617
45844618 if (!rowset.Next ()) {
45854619 return false ;
@@ -4601,19 +4635,18 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
46014635
46024636 while (!rowset.EndOfSet ()) {
46034637 TIndexBuildId id = rowset.GetValue <Schema::KMeansTreeProgress::Id>();
4604- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4605- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found: id# " << id);
4606- auto & buildInfo = *buildInfoPtr->Get ();
4607- buildInfo.KMeans .Set (
4608- rowset.GetValue <Schema::KMeansTreeProgress::Level>(),
4609- rowset.GetValue <Schema::KMeansTreeProgress::ParentBegin>(),
4610- rowset.GetValue <Schema::KMeansTreeProgress::Parent>(),
4611- rowset.GetValue <Schema::KMeansTreeProgress::ChildBegin>(),
4612- rowset.GetValue <Schema::KMeansTreeProgress::Child>(),
4613- rowset.GetValue <Schema::KMeansTreeProgress::State>(),
4614- rowset.GetValue <Schema::KMeansTreeProgress::TableSize>()
4615- );
4616- buildInfo.Sample .Rows .reserve (buildInfo.KMeans .K * 2 );
4638+ fillBuildInfoByIdSafe (id, " KMeansTreeProgress" , [&](TIndexBuildInfo& buildInfo) {
4639+ buildInfo.KMeans .Set (
4640+ rowset.GetValue <Schema::KMeansTreeProgress::Level>(),
4641+ rowset.GetValue <Schema::KMeansTreeProgress::ParentBegin>(),
4642+ rowset.GetValue <Schema::KMeansTreeProgress::Parent>(),
4643+ rowset.GetValue <Schema::KMeansTreeProgress::ChildBegin>(),
4644+ rowset.GetValue <Schema::KMeansTreeProgress::Child>(),
4645+ rowset.GetValue <Schema::KMeansTreeProgress::State>(),
4646+ rowset.GetValue <Schema::KMeansTreeProgress::TableSize>()
4647+ );
4648+ buildInfo.Sample .Rows .reserve (buildInfo.KMeans .K * 2 );
4649+ });
46174650
46184651 if (!rowset.Next ()) {
46194652 return false ;
@@ -4632,13 +4665,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
46324665 size_t sampleCount = 0 ;
46334666 while (!rowset.EndOfSet ()) {
46344667 TIndexBuildId id = rowset.GetValue <Schema::KMeansTreeSample::Id>();
4635- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4636- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found: id# " << id);
4637- auto & buildInfo = *buildInfoPtr->Get ();
4638- buildInfo.Sample .Add (
4639- rowset.GetValue <Schema::KMeansTreeSample::Probability>(),
4640- rowset.GetValue <Schema::KMeansTreeSample::Data>()
4641- );
4668+ fillBuildInfoByIdSafe (id, " KMeansTreeSample" , [&](TIndexBuildInfo& buildInfo) {
4669+ buildInfo.Sample .Add (
4670+ rowset.GetValue <Schema::KMeansTreeSample::Probability>(),
4671+ rowset.GetValue <Schema::KMeansTreeSample::Data>()
4672+ );
4673+ });
46424674 sampleCount++;
46434675
46444676 if (!rowset.Next ()) {
@@ -4660,11 +4692,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
46604692
46614693 while (!rowset.EndOfSet ()) {
46624694 TIndexBuildId id = rowset.GetValue <Schema::IndexBuildColumns::Id>();
4663- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4664- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found"
4665- << " : id# " << id);
4666- auto & buildInfo = *buildInfoPtr->Get ();
4667- buildInfo.AddIndexColumnInfo (rowset);
4695+ fillBuildInfoByIdSafe (id, " IndexBuildColumns" , [&](TIndexBuildInfo& buildInfo) {
4696+ buildInfo.AddIndexColumnInfo (rowset);
4697+ });
46684698
46694699 if (!rowset.Next ()) {
46704700 return false ;
@@ -4680,11 +4710,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
46804710
46814711 while (!rowset.EndOfSet ()) {
46824712 TIndexBuildId id = rowset.GetValue <Schema::BuildColumnOperationSettings::Id>();
4683- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4684- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found"
4685- << " : id# " << id);
4686- auto & buildInfo = *buildInfoPtr->Get ();
4687- buildInfo.AddBuildColumnInfo (rowset);
4713+ fillBuildInfoByIdSafe (id, " BuildColumnOperationSettings" , [&](TIndexBuildInfo& buildInfo) {
4714+ buildInfo.AddBuildColumnInfo (rowset);
4715+ });
46884716
46894717 if (!rowset.Next ()) {
46904718 return false ;
@@ -4701,11 +4729,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
47014729
47024730 while (!rowset.EndOfSet ()) {
47034731 TIndexBuildId id = rowset.GetValue <Schema::IndexBuildShardStatus::Id>();
4704- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4705- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found"
4706- << " : id# " << id);
4707- auto & buildInfo = *buildInfoPtr->Get ();
4708- buildInfo.AddShardStatus (rowset);
4732+ fillBuildInfoByIdSafe (id, " IndexBuildShardStatus" , [&](TIndexBuildInfo& buildInfo) {
4733+ buildInfo.AddShardStatus (rowset);
4734+ });
47094735
47104736 if (!rowset.Next ()) {
47114737 return false ;
0 commit comments