diff --git a/ydb/core/engine/minikql/minikql_engine_host.h b/ydb/core/engine/minikql/minikql_engine_host.h index 8aaf692153fe..e11dae73fa6f 100644 --- a/ydb/core/engine/minikql/minikql_engine_host.h +++ b/ydb/core/engine/minikql/minikql_engine_host.h @@ -1,5 +1,6 @@ #pragma once +#include "minikql_engine_host_counters.h" #include "change_collector_iface.h" #include @@ -12,56 +13,6 @@ namespace NKikimr { namespace NMiniKQL { -struct TEngineHostCounters { - ui64 NSelectRow = 0; - ui64 NSelectRange = 0; - ui64 NUpdateRow = 0; - ui64 NEraseRow = 0; - - ui64 SelectRowRows = 0; - ui64 SelectRowBytes = 0; - ui64 SelectRangeRows = 0; - ui64 SelectRangeBytes = 0; - ui64 SelectRangeDeletedRowSkips = 0; - ui64 UpdateRowBytes = 0; - ui64 EraseRowBytes = 0; - - ui64 InvisibleRowSkips = 0; - - TEngineHostCounters& operator+=(const TEngineHostCounters& other) { - NSelectRow += other.NSelectRow; - NSelectRange += other.NSelectRange; - NUpdateRow += other.NUpdateRow; - NEraseRow += other.NEraseRow; - SelectRowRows += other.SelectRowRows; - SelectRowBytes += other.SelectRowBytes; - SelectRangeRows += other.SelectRangeRows; - SelectRangeBytes += other.SelectRangeBytes; - SelectRangeDeletedRowSkips += other.SelectRangeDeletedRowSkips; - UpdateRowBytes += other.UpdateRowBytes; - EraseRowBytes += other.EraseRowBytes; - InvisibleRowSkips += other.InvisibleRowSkips; - return *this; - } - - TString ToString() const { - return TStringBuilder() - << "{NSelectRow: " << NSelectRow - << ", NSelectRange: " << NSelectRange - << ", NUpdateRow: " << NUpdateRow - << ", NEraseRow: " << NEraseRow - << ", SelectRowRows: " << SelectRowRows - << ", SelectRowBytes: " << SelectRowBytes - << ", SelectRangeRows: " << SelectRangeRows - << ", SelectRangeBytes: " << SelectRangeBytes - << ", UpdateRowBytes: " << UpdateRowBytes - << ", EraseRowBytes: " << EraseRowBytes - << ", SelectRangeDeletedRowSkips: " << SelectRangeDeletedRowSkips - << ", InvisibleRowSkips: " << InvisibleRowSkips - << "}"; - } -}; - struct IKeyAccessSampler : public TThrRefBase { using TPtr = TIntrusivePtr; virtual void AddSample(const TTableId& tableId, const TArrayRef& key) = 0; diff --git a/ydb/core/engine/minikql/minikql_engine_host_counters.h b/ydb/core/engine/minikql/minikql_engine_host_counters.h new file mode 100644 index 000000000000..b4771276872a --- /dev/null +++ b/ydb/core/engine/minikql/minikql_engine_host_counters.h @@ -0,0 +1,58 @@ +#pragma once + +#include "util/string/builder.h" +#include "util/system/types.h" + +namespace NKikimr { +namespace NMiniKQL { + +struct TEngineHostCounters { + ui64 NSelectRow = 0; + ui64 NSelectRange = 0; + ui64 NUpdateRow = 0; + ui64 NEraseRow = 0; + + ui64 SelectRowRows = 0; + ui64 SelectRowBytes = 0; + ui64 SelectRangeRows = 0; + ui64 SelectRangeBytes = 0; + ui64 SelectRangeDeletedRowSkips = 0; + ui64 UpdateRowBytes = 0; + ui64 EraseRowBytes = 0; + + ui64 InvisibleRowSkips = 0; + + TEngineHostCounters& operator+=(const TEngineHostCounters& other) { + NSelectRow += other.NSelectRow; + NSelectRange += other.NSelectRange; + NUpdateRow += other.NUpdateRow; + NEraseRow += other.NEraseRow; + SelectRowRows += other.SelectRowRows; + SelectRowBytes += other.SelectRowBytes; + SelectRangeRows += other.SelectRangeRows; + SelectRangeBytes += other.SelectRangeBytes; + SelectRangeDeletedRowSkips += other.SelectRangeDeletedRowSkips; + UpdateRowBytes += other.UpdateRowBytes; + EraseRowBytes += other.EraseRowBytes; + InvisibleRowSkips += other.InvisibleRowSkips; + return *this; + } + + TString ToString() const { + return TStringBuilder() + << "{NSelectRow: " << NSelectRow + << ", NSelectRange: " << NSelectRange + << ", NUpdateRow: " << NUpdateRow + << ", NEraseRow: " << NEraseRow + << ", SelectRowRows: " << SelectRowRows + << ", SelectRowBytes: " << SelectRowBytes + << ", SelectRangeRows: " << SelectRangeRows + << ", SelectRangeBytes: " << SelectRangeBytes + << ", UpdateRowBytes: " << UpdateRowBytes + << ", EraseRowBytes: " << EraseRowBytes + << ", SelectRangeDeletedRowSkips: " << SelectRangeDeletedRowSkips + << ", InvisibleRowSkips: " << InvisibleRowSkips + << "}"; + } +}; +}} diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index 6a1313776694..8fca651736a1 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -1894,3 +1894,11 @@ message TEvOverloadReady { message TEvOverloadUnsubscribe { optional uint64 SeqNo = 1; } + +// Used for events serialization/deserialization +message TSerializedEvent { + // Serialized TEventPBBase event + optional bytes EventData = 1; + // TEventSerializationInfo::IsExtendedFormat flag + optional bool IsExtendedFormat = 2; +} \ No newline at end of file diff --git a/ydb/core/tx/datashard/check_write_unit.cpp b/ydb/core/tx/datashard/check_write_unit.cpp index cf5abac1f506..40ea98a74238 100644 --- a/ydb/core/tx/datashard/check_write_unit.cpp +++ b/ydb/core/tx/datashard/check_write_unit.cpp @@ -113,7 +113,15 @@ EExecutionStatus TCheckWriteUnit::Execute(TOperation::TPtr op, return EExecutionStatus::Executed; } - writeOp->SetWriteResult(NEvents::TDataEvents::TEvWriteResult::BuildPrepared(DataShard.TabletID(), op->GetTxId(), {op->GetMinStep(), op->GetMaxStep(), {}})); + writeOp->SetWriteResult(NEvents::TDataEvents::TEvWriteResult::BuildPrepared( + DataShard.TabletID(), + op->GetTxId(), + { + op->GetMinStep(), + op->GetMaxStep(), + DataShard.GetProcessingParams() ? DataShard.GetProcessingParams()->GetCoordinators() : google::protobuf::RepeatedField{} + } + )); LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Prepared " << *op << " at " << DataShard.TabletID()); } diff --git a/ydb/core/tx/datashard/complete_data_tx_unit.cpp b/ydb/core/tx/datashard/complete_data_tx_unit.cpp index 1055068cdfed..d11fd8308ebb 100644 --- a/ydb/core/tx/datashard/complete_data_tx_unit.cpp +++ b/ydb/core/tx/datashard/complete_data_tx_unit.cpp @@ -97,7 +97,7 @@ void TCompleteOperationUnit::CompleteOperation(TOperation::TPtr op, if (result) { result->Record.SetProposeLatency(duration.MilliSeconds()); - DataShard.FillExecutionStats(op->GetExecutionProfile(), *result); + DataShard.FillExecutionStats(op->GetExecutionProfile(), *result->Record.MutableTxStats()); if (!gSkipRepliesFailPoint.Check(DataShard.TabletID(), op->GetTxId())) { result->Orbit = std::move(op->Orbit); diff --git a/ydb/core/tx/datashard/complete_write_unit.cpp b/ydb/core/tx/datashard/complete_write_unit.cpp new file mode 100644 index 000000000000..0b453eec0d39 --- /dev/null +++ b/ydb/core/tx/datashard/complete_write_unit.cpp @@ -0,0 +1,114 @@ +#include "datashard_failpoints.h" +#include "datashard_impl.h" +#include "datashard_pipeline.h" +#include "execution_unit_ctors.h" +#include "probes.h" + +#include + +LWTRACE_USING(DATASHARD_PROVIDER) + +namespace NKikimr { +namespace NDataShard { + +using namespace NMiniKQL; + +class TCompleteWriteUnit : public TExecutionUnit { +public: + TCompleteWriteUnit(TDataShard &dataShard, TPipeline &pipeline); + ~TCompleteWriteUnit() override; + + bool IsReadyToExecute(TOperation::TPtr op) const override; + EExecutionStatus Execute(TOperation::TPtr op, TTransactionContext &txc,const TActorContext &ctx) override; + void Complete(TOperation::TPtr op, const TActorContext &ctx) override; + +private: + void CompleteWrite(TOperation::TPtr op, const TActorContext &ctx); +}; + +TCompleteWriteUnit::TCompleteWriteUnit(TDataShard &dataShard, TPipeline &pipeline) + : TExecutionUnit(EExecutionUnitKind::CompleteWrite, false, dataShard, pipeline) +{ +} + +TCompleteWriteUnit::~TCompleteWriteUnit() +{ +} + +bool TCompleteWriteUnit::IsReadyToExecute(TOperation::TPtr) const +{ + return true; +} + +EExecutionStatus TCompleteWriteUnit::Execute(TOperation::TPtr op, + TTransactionContext &txc, + const TActorContext &ctx) +{ + TWriteOperation* writeOp = TWriteOperation::CastWriteOperation(op); + + Pipeline.DeactivateOp(op, txc, ctx); + + if (writeOp->GetWriteResult()) { + Pipeline.AddCompletingOp(op); + } + + // TODO: release snapshot used by a planned tx (not currently used) + // TODO: prepared txs may be cancelled until planned, in which case we may + // end up with a dangling snapshot reference. Such references would have + // to be handled in a restart-safe manner too. + Y_DEBUG_ABORT_UNLESS(!op->HasAcquiredSnapshotKey()); + + return EExecutionStatus::DelayComplete; +} + +void TCompleteWriteUnit::CompleteWrite(TOperation::TPtr op, const TActorContext& ctx) +{ + auto duration = TAppData::TimeProvider->Now() - op->GetStartExecutionAt(); + + if (DataShard.GetDataTxProfileLogThresholdMs() + && duration.MilliSeconds() >= DataShard.GetDataTxProfileLogThresholdMs()) { + LOG_WARN_S(ctx, NKikimrServices::TX_DATASHARD, + op->ExecutionProfileLogString(DataShard.TabletID())); + } + + if (DataShard.GetDataTxProfileBufferThresholdMs() + && duration.MilliSeconds() >= DataShard.GetDataTxProfileBufferThresholdMs()) { + Pipeline.HoldExecutionProfile(op); + } + + TWriteOperation* writeOp = TWriteOperation::CastWriteOperation(op); + + auto result = writeOp->ReleaseWriteResult(); + if (result) { + DataShard.FillExecutionStats(op->GetExecutionProfile(), *result->Record.MutableTxStats()); + + if (!gSkipRepliesFailPoint.Check(DataShard.TabletID(), op->GetTxId())) { + result->SetOrbit(std::move(op->Orbit)); + DataShard.SendWriteResult(ctx, result, op->GetTarget(), op->GetStep(), op->GetTxId()); + } + } + + Pipeline.RemoveCompletingOp(op); +} + +void TCompleteWriteUnit::Complete(TOperation::TPtr op, const TActorContext &ctx) +{ + Pipeline.RemoveCommittingOp(op); + Pipeline.RemoveTx(op->GetStepOrder()); + DataShard.IncCounter(COUNTER_WRITE_SUCCESS); + + CompleteWrite(op, ctx); + + DataShard.SendDelayedAcks(ctx, op->DelayedAcks()); + + DataShard.EnqueueChangeRecords(std::move(op->ChangeRecords())); + DataShard.EmitHeartbeats(); +} + +THolder CreateCompleteWriteUnit(TDataShard &dataShard, TPipeline &pipeline) +{ + return THolder(new TCompleteWriteUnit(dataShard, pipeline)); +} + +} // namespace NDataShard +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard.cpp b/ydb/core/tx/datashard/datashard.cpp index 6539b5a4243e..3155d85a8459 100644 --- a/ydb/core/tx/datashard/datashard.cpp +++ b/ydb/core/tx/datashard/datashard.cpp @@ -637,14 +637,33 @@ void TDataShard::SendResult(const TActorContext &ctx, ctx.Send(target, res.Release(), flags); } -void TDataShard::FillExecutionStats(const TExecutionProfile& execProfile, TEvDataShard::TEvProposeTransactionResult& result) const { +void TDataShard::SendWriteResult(const TActorContext& ctx, std::unique_ptr& result, const TActorId& target, ui64 step, ui64 txId) { + Y_ABORT_UNLESS(txId == result->Record.GetTxId(), "%" PRIu64 " vs %" PRIu64, txId, result->Record.GetTxId()); + + // TODO: Volatile + /* + if (VolatileTxManager.FindByTxId(txId)) { + // This is a volatile transaction, and we need to wait until it is resolved + bool ok = VolatileTxManager.AttachVolatileTxCallback(txId, new TSendVolatileResult(this, std::move(result), target, step, txId)); + Y_ABORT_UNLESS(ok); + return; + } + */ + + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Complete write [" << step << " : " << txId << "] from " << TabletID() << " at tablet " << TabletID() << " send result to client " << target); + + LWTRACK(ProposeTransactionSendResult, result->GetOrbit()); + ctx.Send(target, result.release(), 0); +} + +void TDataShard::FillExecutionStats(const TExecutionProfile& execProfile, NKikimrQueryStats::TTxStats& txStats) const { TDuration totalCpuTime; for (const auto& unit : execProfile.UnitProfiles) { totalCpuTime += unit.second.ExecuteTime; totalCpuTime += unit.second.CompleteTime; } - result.Record.MutableTxStats()->MutablePerShardStats()->Clear(); - auto& stats = *result.Record.MutableTxStats()->AddPerShardStats(); + txStats.MutablePerShardStats()->Clear(); + auto& stats = *txStats.AddPerShardStats(); stats.SetShardId(TabletID()); stats.SetCpuTimeUsec(totalCpuTime.MicroSeconds()); } diff --git a/ydb/core/tx/datashard/datashard__write.cpp b/ydb/core/tx/datashard/datashard__write.cpp index 437196f16469..d1659138f4a1 100644 --- a/ydb/core/tx/datashard/datashard__write.cpp +++ b/ydb/core/tx/datashard/datashard__write.cpp @@ -72,7 +72,9 @@ bool TDataShard::TTxWrite::Execute(TTransactionContext& txc, const TActorContext return true; } - TOperation::TPtr op = Self->Pipeline.BuildOperation(Ev, ReceivedAt, TieBreakerIndex, txc, ctx, std::move(DatashardTransactionSpan)); + TOperation::TPtr op = Self->Pipeline.BuildOperation(std::move(Ev), ReceivedAt, TieBreakerIndex, txc, std::move(DatashardTransactionSpan)); + Y_ABORT_UNLESS(!Ev); + TWriteOperation* writeOp = TWriteOperation::CastWriteOperation(op); // Unsuccessful operation parse. @@ -90,7 +92,6 @@ bool TDataShard::TTxWrite::Execute(TTransactionContext& txc, const TActorContext Self->Pipeline.GetExecutionUnit(op->GetCurrentUnit()).AddOperation(op); Op = op; - Ev = nullptr; Op->IncrementInProgress(); } @@ -231,7 +232,7 @@ void TDataShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorCo ProposeTransaction(std::move(ev), ctx); } -ui64 EvWrite::Convertor::GetTxId(const TAutoPtr& ev) { +ui64 NEvWrite::TConvertor::GetTxId(const TAutoPtr& ev) { switch (ev->GetTypeRewrite()) { case TEvDataShard::TEvProposeTransaction::EventType: return ev->Get()->GetTxId(); @@ -242,7 +243,7 @@ ui64 EvWrite::Convertor::GetTxId(const TAutoPtr& ev) { } } -ui64 EvWrite::Convertor::GetProposeFlags(NKikimrDataEvents::TEvWrite::ETxMode txMode) { +ui64 NEvWrite::TConvertor::GetProposeFlags(NKikimrDataEvents::TEvWrite::ETxMode txMode) { switch (txMode) { case NKikimrDataEvents::TEvWrite::MODE_PREPARE: return TTxFlags::Default; @@ -255,7 +256,7 @@ ui64 EvWrite::Convertor::GetProposeFlags(NKikimrDataEvents::TEvWrite::ETxMode tx } } -NKikimrDataEvents::TEvWrite::ETxMode EvWrite::Convertor::GetTxMode(ui64 flags) { +NKikimrDataEvents::TEvWrite::ETxMode NEvWrite::TConvertor::GetTxMode(ui64 flags) { if ((flags & TTxFlags::Immediate) && !(flags & TTxFlags::ForceOnline)) { return NKikimrDataEvents::TEvWrite::ETxMode::TEvWrite_ETxMode_MODE_IMMEDIATE; } @@ -267,7 +268,7 @@ NKikimrDataEvents::TEvWrite::ETxMode EvWrite::Convertor::GetTxMode(ui64 flags) { } } -NKikimrTxDataShard::TEvProposeTransactionResult::EStatus EvWrite::Convertor::GetStatus(NKikimrDataEvents::TEvWriteResult::EStatus status) { +NKikimrTxDataShard::TEvProposeTransactionResult::EStatus NEvWrite::TConvertor::GetStatus(NKikimrDataEvents::TEvWriteResult::EStatus status) { switch (status) { case NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED: return NKikimrTxDataShard::TEvProposeTransactionResult::COMPLETE; @@ -278,7 +279,7 @@ NKikimrTxDataShard::TEvProposeTransactionResult::EStatus EvWrite::Convertor::Get } } -NKikimrDataEvents::TEvWriteResult::EStatus EvWrite::Convertor::ConvertErrCode(NKikimrTxDataShard::TError::EKind code) { +NKikimrDataEvents::TEvWriteResult::EStatus NEvWrite::TConvertor::ConvertErrCode(NKikimrTxDataShard::TError::EKind code) { switch (code) { case NKikimrTxDataShard::TError_EKind_OK: return NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED; @@ -292,4 +293,22 @@ NKikimrDataEvents::TEvWriteResult::EStatus EvWrite::Convertor::ConvertErrCode(NK return NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR; } } + +TOperation::TPtr NEvWrite::TConvertor::MakeOperation(EOperationKind kind, const TBasicOpInfo& info, ui64 tabletId) { + switch (kind) { + case EOperationKind::DataTx: + case EOperationKind::SchemeTx: + case EOperationKind::Snapshot: + case EOperationKind::DistributedErase: + case EOperationKind::CommitWrites: + case EOperationKind::ReadTable: + return MakeIntrusive(info); + case EOperationKind::WriteTx: + return MakeIntrusive(info, tabletId); + case EOperationKind::DirectTx: + case EOperationKind::ReadTx: + case EOperationKind::Unknown: + Y_ABORT("Unsupported"); + } +} } \ No newline at end of file diff --git a/ydb/core/tx/datashard/datashard_active_transaction.cpp b/ydb/core/tx/datashard/datashard_active_transaction.cpp index a52c1af82899..51a93b67dd71 100644 --- a/ydb/core/tx/datashard/datashard_active_transaction.cpp +++ b/ydb/core/tx/datashard/datashard_active_transaction.cpp @@ -24,7 +24,6 @@ TValidatedDataTx::TValidatedDataTx(TDataShard *self, , EngineBay(self, txc, ctx, stepTxId) , ErrCode(NKikimrTxDataShard::TError::OK) , TxSize(0) - , TxCacheUsage(0) , IsReleased(false) , BuiltTaskRunner(false) , IsReadOnly(true) @@ -280,11 +279,10 @@ bool TValidatedDataTx::CheckCancelled(ui64 tabletId) { TInstant now = AppData()->TimeProvider->Now(); Cancelled = (now >= Deadline()); - Cancelled = Cancelled || gCancelTxFailPoint.Check(tabletId, TxId()); + Cancelled = Cancelled || gCancelTxFailPoint.Check(tabletId, GetTxId()); if (Cancelled) { - LOG_NOTICE_S(*TlsActivationContext->ExecutorThread.ActorSystem, NKikimrServices::TX_DATASHARD, - "CANCELLED TxId " << TxId() << " at " << tabletId); + LOG_NOTICE_S(*TlsActivationContext->ExecutorThread.ActorSystem, NKikimrServices::TX_DATASHARD, "CANCELLED TxId " << GetTxId() << " at " << tabletId); } return Cancelled; } @@ -332,7 +330,7 @@ void TActiveTransaction::FillTxData(TValidatedDataTx::TPtr dataTx) Y_ABORT_UNLESS(!DataTx); Y_ABORT_UNLESS(TxBody.empty() || HasVolatilePrepareFlag()); - Target = dataTx->Source(); + Target = dataTx->GetSource(); DataTx = dataTx; if (DataTx->HasStreamResponse()) @@ -543,9 +541,7 @@ void TActiveTransaction::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBas DataTx->ReleaseTxData(); // Immediate transactions have no body stored. if (!IsImmediate() && !HasVolatilePrepareFlag()) { - UntrackMemory(); - TxBody.clear(); - TrackMemory(); + ClearTxBody(); } //InReadSets.clear(); @@ -687,50 +683,6 @@ void TActiveTransaction::FinalizeDataTxPlan() RewriteExecutionPlan(plan); } -class TFinalizeDataTxPlanUnit : public TExecutionUnit { -public: - TFinalizeDataTxPlanUnit(TDataShard &dataShard, TPipeline &pipeline) - : TExecutionUnit(EExecutionUnitKind::FinalizeDataTxPlan, false, dataShard, pipeline) - { } - - bool IsReadyToExecute(TOperation::TPtr) const override { - return true; - } - - EExecutionStatus Execute(TOperation::TPtr op, - TTransactionContext &txc, - const TActorContext &ctx) override - { - Y_UNUSED(txc); - Y_UNUSED(ctx); - - TActiveTransaction *tx = dynamic_cast(op.Get()); - Y_VERIFY_S(tx, "cannot cast operation of kind " << op->GetKind()); - Y_VERIFY_S(tx->IsDataTx(), "unexpected non-data tx"); - - if (auto dataTx = tx->GetDataTx()) { - // Restore transaction type flags - if (dataTx->IsKqpDataTx() && !tx->IsKqpDataTransaction()) - tx->SetKqpDataTransactionFlag(); - Y_VERIFY_S(!dataTx->IsKqpScanTx(), "unexpected kqp scan tx"); - } - - tx->FinalizeDataTxPlan(); - - return EExecutionStatus::Executed; - } - - void Complete(TOperation::TPtr op, - const TActorContext &ctx) override - { - Y_UNUSED(op); - Y_UNUSED(ctx); - } -}; - -THolder CreateFinalizeDataTxPlanUnit(TDataShard &dataShard, TPipeline &pipeline) { - return THolder(new TFinalizeDataTxPlanUnit(dataShard, pipeline)); -} void TActiveTransaction::BuildExecutionPlan(bool loaded) { diff --git a/ydb/core/tx/datashard/datashard_active_transaction.h b/ydb/core/tx/datashard/datashard_active_transaction.h index 6c9dfa0a6e1f..a654b191a2d4 100644 --- a/ydb/core/tx/datashard/datashard_active_transaction.h +++ b/ydb/core/tx/datashard/datashard_active_transaction.h @@ -114,7 +114,7 @@ struct TSchemaOperation { }; /// @note This class incapsulates Engine stuff for minor needs. Do not return TEngine out of it. -class TValidatedDataTx : TNonCopyable { +class TValidatedDataTx : TNonCopyable, public TValidatedTx { public: using TPtr = std::shared_ptr; @@ -128,13 +128,15 @@ class TValidatedDataTx : TNonCopyable { ~TValidatedDataTx(); + EType GetType() const override { return EType::DataTx; }; + static constexpr ui64 MaxReorderTxKeys() { return 100; } NKikimrTxDataShard::TError::EKind Code() const { return ErrCode; } const TString GetErrors() const { return ErrStr; } TStepOrder StepTxId() const { return StepTxId_; } - ui64 TxId() const { return StepTxId_.TxId; } + ui64 GetTxId() const override { return StepTxId_.TxId; } const TString& Body() const { return TxBody; } ui64 LockTxId() const { return Tx.GetLockTxId(); } @@ -150,7 +152,6 @@ class TValidatedDataTx : TNonCopyable { bool Ready() const { return ErrCode == NKikimrTxDataShard::TError::OK; } bool RequirePrepare() const { return ErrCode == NKikimrTxDataShard::TError::SNAPSHOT_NOT_READY_YET; } - bool RequireWrites() const { return TxInfo().HasWrites() || !Immediate(); } bool HasWrites() const { return TxInfo().HasWrites(); } bool HasLockedWrites() const { return HasWrites() && LockTxId(); } bool HasDynamicWrites() const { return TxInfo().DynKeysCount != 0; } @@ -192,10 +193,7 @@ class TValidatedDataTx : TNonCopyable { std::optional GetVolatileChangeGroup() const { return EngineBay.GetVolatileChangeGroup(); } bool GetVolatileCommitOrdered() const { return EngineBay.GetVolatileCommitOrdered(); } - TActorId Source() const { return Source_; } - void SetSource(const TActorId& actorId) { Source_ = actorId; } void SetStep(ui64 step) { StepTxId_.Step = step; } - bool IsProposed() const { return Source_ != TActorId(); } bool IsTableRead() const { return Tx.HasReadTableTransaction(); } @@ -272,9 +270,9 @@ class TValidatedDataTx : TNonCopyable { ui64 GetTxSize() const { return TxSize; } ui32 KeysCount() const { return TxInfo().ReadsCount + TxInfo().WritesCount; } - - void SetTxCacheUsage(ui64 val) { TxCacheUsage = val; } - ui64 GetTxCacheUsage() const { return TxCacheUsage; } + ui64 GetMemoryConsumption() const override { + return GetTxSize() + GetMemoryAllocated(); + } void ReleaseTxData(); bool IsTxDataReleased() const { return IsReleased; } @@ -291,13 +289,11 @@ class TValidatedDataTx : TNonCopyable { private: TStepOrder StepTxId_; TString TxBody; - TActorId Source_; TEngineBay EngineBay; NKikimrTxDataShard::TDataTransaction Tx; NKikimrTxDataShard::TError::EKind ErrCode; TString ErrStr; ui64 TxSize; - ui64 TxCacheUsage; bool IsReleased; bool BuiltTaskRunner; TMaybe PerShardKeysSizeLimitBytes_; @@ -311,12 +307,6 @@ class TValidatedDataTx : TNonCopyable { void ComputeDeadline(); }; -enum class ERestoreDataStatus { - Ok, - Restart, - Error, -}; - /// class TDistributedEraseTx { public: diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index 03c7124f4460..5e5329c67418 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -1448,11 +1448,9 @@ class TDataShard const TActorId& target, std::unique_ptr event, ui64 cookie = 0); - void SendResult(const TActorContext &ctx, - TOutputOpData::TResultPtr &result, - const TActorId &target, - ui64 step, - ui64 txId); + void SendResult(const TActorContext &ctx, TOutputOpData::TResultPtr &result, const TActorId &target, ui64 step, ui64 txId); + void SendWriteResult(const TActorContext& ctx, std::unique_ptr& result, const TActorId& target, ui64 step, ui64 txId); + void FillSplitTrajectory(ui64 origin, NKikimrTx::TBalanceTrackList& tracks); void SetCounter(NDataShard::ESimpleCounters counter, ui64 num) const { @@ -1948,7 +1946,7 @@ class TDataShard void CheckMediatorStateRestored(); - void FillExecutionStats(const TExecutionProfile& execProfile, TEvDataShard::TEvProposeTransactionResult& result) const; + void FillExecutionStats(const TExecutionProfile& execProfile, NKikimrQueryStats::TTxStats& txStats) const; // Executes TTxProgressTransaction without specific operation void ExecuteProgressTx(const TActorContext& ctx); @@ -2425,7 +2423,7 @@ class TDataShard void Enqueue(TAutoPtr event, TInstant receivedAt, ui64 tieBreakerIndex, const TActorContext& ctx) { TItem* item = &Items.emplace_back(std::move(event), receivedAt, tieBreakerIndex); - const ui64 txId = EvWrite::Convertor::GetTxId(item->Event); + const ui64 txId = NEvWrite::TConvertor::GetTxId(item->Event); auto& links = TxIds[txId]; if (Y_UNLIKELY(links.Last)) { @@ -2440,7 +2438,7 @@ class TDataShard TItem Dequeue() { TItem* first = &Items.front(); - const ui64 txId = EvWrite::Convertor::GetTxId(first->Event); + const ui64 txId = NEvWrite::TConvertor::GetTxId(first->Event); auto it = TxIds.find(txId); Y_ABORT_UNLESS(it != TxIds.end() && it->second.First == first, @@ -2562,9 +2560,7 @@ class TDataShard TInstant StartedKeyAccessSamplingAt; TInstant StopKeyAccessSamplingAt; - using TTableInfos = THashMap; - - TTableInfos TableInfos; // tableId -> local table info + TUserTable::TTableInfos TableInfos; // tableId -> local table info TTransQueue TransQueue; TOutReadSets OutReadSets; TPipeline Pipeline; diff --git a/ydb/core/tx/datashard/datashard_kqp.cpp b/ydb/core/tx/datashard/datashard_kqp.cpp index 71ed5ff64239..7ff68f92025b 100644 --- a/ydb/core/tx/datashard/datashard_kqp.cpp +++ b/ydb/core/tx/datashard/datashard_kqp.cpp @@ -20,30 +20,25 @@ namespace NDataShard { namespace { -const ui32 MaxDatashardReplySize = 48 * 1024 * 1024; // 48 MB +const ui32 MaxDatashardReplySize = 48 * 1024 * 1024; // 48 MB using namespace NYql; -bool KqpValidateTask(const NYql::NDqProto::TDqTask& task, bool isImmediate, ui64 txId, const TActorContext& ctx, - bool& hasPersistentChannels) +bool KqpValidateTask(const NYql::NDqProto::TDqTask& task, bool isImmediate, ui64 txId, const TActorContext& ctx, bool& hasPersistentChannels) { for (auto& input : task.GetInputs()) { for (auto& channel : input.GetChannels()) { if (channel.GetIsPersistent()) { hasPersistentChannels = true; if (isImmediate) { - LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId - << ", immediate KQP transaction cannot have persistent input channels" - << ", task: " << task.GetId() - << ", channelId: " << channel.GetId()); + LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId << ", immediate KQP transaction cannot have persistent input channels" + << ", task: " << task.GetId() << ", channelId: " << channel.GetId()); return false; } if (!channel.GetSrcEndpoint().HasTabletId()) { - LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId - << ", persistent input channel without src tablet id" - << ", task: " << task.GetId() - << ", channelId: " << channel.GetId()); + LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId << ", persistent input channel without src tablet id" + << ", task: " << task.GetId() << ", channelId: " << channel.GetId()); return false; } } @@ -55,18 +50,14 @@ bool KqpValidateTask(const NYql::NDqProto::TDqTask& task, bool isImmediate, ui64 if (channel.GetIsPersistent()) { hasPersistentChannels = true; if (isImmediate) { - LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId - << ", immediate KQP transaction cannot have persistent output channels" - << ", task: " << task.GetId() - << ", channelId: " << channel.GetId()); + LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId << ", immediate KQP transaction cannot have persistent output channels" + << ", task: " << task.GetId() << ", channelId: " << channel.GetId()); return false; } if (!channel.GetDstEndpoint().HasTabletId()) { - LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId - << ", persistent output channel without dst tablet id" - << ", task: " << task.GetId() - << ", channelId: " << channel.GetId()); + LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "KQP validate, txId: " << txId << ", persistent output channel without dst tablet id" + << ", task: " << task.GetId() << ", channelId: " << channel.GetId()); return false; } } @@ -98,11 +89,9 @@ NUdf::EFetchStatus FetchOutput(NDq::IDqOutputChannel* channel, NDq::TDqSerialize return NUdf::EFetchStatus::Yield; } -NDq::ERunStatus RunKqpTransactionInternal(const TActorContext& ctx, ui64 txId, - const TInputOpData::TInReadSets* inReadSets, bool useGenericReadSets, - NKqp::TKqpTasksRunner& tasksRunner, bool applyEffects) +NDq::ERunStatus RunKqpTransactionInternal(const TActorContext& ctx, ui64 txId, const TInputOpData::TInReadSets* inReadSets, bool useGenericReadSets, NKqp::TKqpTasksRunner& tasksRunner, bool applyEffects) { - THashMap> inputChannelsMap; // channelId -> (taskId, input index) + THashMap> inputChannelsMap; // channelId -> (taskId, input index) for (auto& [taskId, task] : tasksRunner.GetTasks()) { for (ui32 i = 0; i < task.InputsSize(); ++i) { auto& input = task.GetInputs(i); @@ -129,13 +118,11 @@ NDq::ERunStatus RunKqpTransactionInternal(const TActorContext& ctx, ui64 txId, if (useGenericReadSets) { NKikimrTx::TReadSetData genericData; bool ok = genericData.ParseFromString(data.Body); - Y_ABORT_UNLESS(ok, "Failed to parse generic readset data from %" PRIu64 " to %" PRIu64 " origin %" PRIu64, - source, target, data.Origin); + Y_ABORT_UNLESS(ok, "Failed to parse generic readset data from %" PRIu64 " to %" PRIu64 " origin %" PRIu64, source, target, data.Origin); if (genericData.HasData()) { ok = genericData.GetData().UnpackTo(&kqpReadset); - Y_ABORT_UNLESS(ok, "Failed to parse kqp readset data from %" PRIu64 " to %" PRIu64 " origin %" PRIu64, - source, target, data.Origin); + Y_ABORT_UNLESS(ok, "Failed to parse kqp readset data from %" PRIu64 " to %" PRIu64 " origin %" PRIu64, source, target, data.Origin); } } else { Y_PROTOBUF_SUPPRESS_NODISCARD kqpReadset.ParseFromString(data.Body); @@ -150,8 +137,7 @@ NDq::ERunStatus RunKqpTransactionInternal(const TActorContext& ctx, ui64 txId, auto taskId = inputInfo->first; LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Added KQP readset" - << ", source: " << source << ", target: " << target << ", origin: " << data.Origin - << ", TxId: " << txId << ", task: " << taskId << ", channelId: " << channelId); + << ", source: " << source << ", target: " << target << ", origin: " << data.Origin << ", TxId: " << txId << ", task: " << taskId << ", channelId: " << channelId); auto channel = tasksRunner.GetInputChannel(taskId, channelId); NDq::TDqSerializedBatch batch; @@ -184,23 +170,18 @@ NDq::ERunStatus RunKqpTransactionInternal(const TActorContext& ctx, ui64 txId, for (ui32 i = 0; i < task.OutputsSize(); ++i) { for (auto& channel : task.GetOutputs(i).GetChannels()) { if (auto* inputInfo = inputChannelsMap.FindPtr(channel.GetId())) { - auto transferState = tasksRunner.TransferData(task.GetId(), channel.GetId(), - inputInfo->first, channel.GetId()); + auto transferState = tasksRunner.TransferData(task.GetId(), channel.GetId(), inputInfo->first, channel.GetId()); if (transferState.first) { hasInputChanges = true; LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Forwarded KQP channel data" - << ", TxId: " << txId - << ", srcTask: " << task.GetId() << ", dstTask: " << inputInfo->first - << ", channelId: " << channel.GetId()); + << ", TxId: " << txId << ", srcTask: " << task.GetId() << ", dstTask: " << inputInfo->first << ", channelId: " << channel.GetId()); } if (transferState.second) { hasInputChanges = true; LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Finished input channel" - << ", TxId: " << txId - << ", srcTask: " << task.GetId() << ", dstTask: " << inputInfo->first - << ", channelId: " << channel.GetId()); + << ", TxId: " << txId << ", srcTask: " << task.GetId() << ", dstTask: " << inputInfo->first << ", channelId: " << channel.GetId()); } } } @@ -265,8 +246,7 @@ TVector MakeLockKey(const NKikimrDataEvents::TLock& lockProto) { } // returns list of broken locks -TVector ValidateLocks(const NKikimrDataEvents::TKqpLocks& txLocks, TSysLocks& sysLocks, - ui64 tabletId) +TVector ValidateLocks(const NKikimrDataEvents::TKqpLocks& txLocks, TSysLocks& sysLocks, ui64 tabletId) { TVector brokenLocks; @@ -283,10 +263,7 @@ TVector ValidateLocks(const NKikimrDataEvents::TKqpLoc auto lock = sysLocks.GetLock(lockKey); if (lock.Generation != lockProto.GetGeneration() || lock.Counter != lockProto.GetCounter()) { - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "ValidateLocks: broken lock " - << lockProto.GetLockId() - << " expected " << lockProto.GetGeneration() << ":" << lockProto.GetCounter() - << " found " << lock.Generation << ":" << lock.Counter); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "ValidateLocks: broken lock " << lockProto.GetLockId() << " expected " << lockProto.GetGeneration() << ":" << lockProto.GetCounter() << " found " << lock.Generation << ":" << lock.Counter); brokenLocks.push_back(lockProto); } } @@ -306,10 +283,9 @@ bool ReceiveLocks(const NKikimrDataEvents::TKqpLocks& locks, ui64 shardId) { return it != receivingShards.end(); } -} // namespace +} // namespace -bool KqpValidateTransaction(const ::google::protobuf::RepeatedPtrField< ::NYql::NDqProto::TDqTask>& tasks, bool isImmediate, ui64 txId, - const TActorContext& ctx, bool& hasPersistentChannels) +bool KqpValidateTransaction(const ::google::protobuf::RepeatedPtrField<::NYql::NDqProto::TDqTask>& tasks, bool isImmediate, ui64 txId, const TActorContext& ctx, bool& hasPersistentChannels) { for (const auto& task : tasks) { if (!KqpValidateTask(task, isImmediate, txId, ctx, hasPersistentChannels)) { @@ -327,10 +303,8 @@ using TWriteOpMeta = NKikimrTxDataShard::TKqpTransaction::TDataTaskMeta::TWriteO using TColumnMeta = NKikimrTxDataShard::TKqpTransaction::TColumnMeta; NTable::TColumn GetColumn(const TColumnMeta& columnMeta) { - auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(columnMeta.GetType(), - columnMeta.HasTypeInfo() ? &columnMeta.GetTypeInfo() : nullptr); - return NTable::TColumn(columnMeta.GetName(), columnMeta.GetId(), - typeInfoMod.TypeInfo, typeInfoMod.TypeMod); + auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(columnMeta.GetType(), columnMeta.HasTypeInfo() ? &columnMeta.GetTypeInfo() : nullptr); + return NTable::TColumn(columnMeta.GetName(), columnMeta.GetId(), typeInfoMod.TypeInfo, typeInfoMod.TypeMod); } TVector GetColumns(const TReadOpMeta& readMeta) { @@ -359,10 +333,7 @@ TVector GetColumnWrites(const TWriteOpMeta& wri } template -void KqpSetTxKeysImpl(ui64 tabletId, ui64 taskId, const TTableId& tableId, const TUserTable* tableInfo, - const NKikimrTxDataShard::TKqpTransaction_TDataTaskMeta_TKeyRange& rangeKind, - const TReadOpMeta* readMeta, const TWriteOpMeta* writeMeta, const NScheme::TTypeRegistry& typeRegistry, - const TActorContext& ctx, TKeyValidator& keyValidator) +void KqpSetTxKeysImpl(ui64 tabletId, ui64 taskId, const TTableId& tableId, const TUserTable* tableInfo, const NKikimrTxDataShard::TKqpTransaction_TDataTaskMeta_TKeyRange& rangeKind, const TReadOpMeta* readMeta, const TWriteOpMeta* writeMeta, const NScheme::TTypeRegistry& typeRegistry, const TActorContext& ctx, TKeyValidator& keyValidator) { if (Read) { Y_ABORT_UNLESS(readMeta); @@ -379,19 +350,14 @@ void KqpSetTxKeysImpl(ui64 tabletId, ui64 taskId, const TTableId& tableId, const TSerializedTableRange tableRange; tableRange.Load(range); - LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path - << ", shard: " << tabletId - << ", task: " << taskId << ", " << (Read ? "read range " : "write range ") - << DebugPrintRange(tableInfo->KeyColumnTypes, tableRange.ToTableRange(), typeRegistry)); + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path << ", shard: " << tabletId << ", task: " << taskId << ", " << (Read ? "read range " : "write range ") << DebugPrintRange(tableInfo->KeyColumnTypes, tableRange.ToTableRange(), typeRegistry)); Y_DEBUG_ABORT_UNLESS(!(tableRange.To.GetCells().empty() && tableRange.ToInclusive)); if constexpr (Read) { - keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tableRange.ToTableRange(), - tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); + keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tableRange.ToTableRange(), tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); } else { - keyValidator.AddWriteRange(tableId, tableRange.ToTableRange(), tableInfo->KeyColumnTypes, - GetColumnWrites(*writeMeta), writeMeta->GetIsPureEraseOp()); + keyValidator.AddWriteRange(tableId, tableRange.ToTableRange(), tableInfo->KeyColumnTypes, GetColumnWrites(*writeMeta), writeMeta->GetIsPureEraseOp()); } } @@ -399,10 +365,7 @@ void KqpSetTxKeysImpl(ui64 tabletId, ui64 taskId, const TTableId& tableId, const TSerializedTableRange tablePoint(point, point, true, true); tablePoint.Point = true; - LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path - << ", shard: " << tabletId << - ", task: " << taskId << ", " << (Read ? "read point " : "write point ") - << DebugPrintPoint(tableInfo->KeyColumnTypes, tablePoint.From.GetCells(), typeRegistry)); + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path << ", shard: " << tabletId << ", task: " << taskId << ", " << (Read ? "read point " : "write point ") << DebugPrintPoint(tableInfo->KeyColumnTypes, tablePoint.From.GetCells(), typeRegistry)); if constexpr (Read) { keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tablePoint.ToTableRange(), tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); @@ -418,33 +381,24 @@ void KqpSetTxKeysImpl(ui64 tabletId, ui64 taskId, const TTableId& tableId, const TSerializedTableRange tableRange; tableRange.Load(rangeKind.GetFullRange()); - LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path - << ", shard: " << tabletId - << ", task: " << taskId << ", " << (Read ? "read range: FULL " : "write range: FULL ") - << DebugPrintRange(tableInfo->KeyColumnTypes, tableRange.ToTableRange(), typeRegistry)); + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path << ", shard: " << tabletId << ", task: " << taskId << ", " << (Read ? "read range: FULL " : "write range: FULL ") << DebugPrintRange(tableInfo->KeyColumnTypes, tableRange.ToTableRange(), typeRegistry)); if constexpr (Read) { - keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tableRange.ToTableRange(), - tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); + keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tableRange.ToTableRange(), tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); } else { - keyValidator.AddWriteRange(tableId, tableRange.ToTableRange(), tableInfo->KeyColumnTypes, - GetColumnWrites(*writeMeta), writeMeta->GetIsPureEraseOp()); + keyValidator.AddWriteRange(tableId, tableRange.ToTableRange(), tableInfo->KeyColumnTypes, GetColumnWrites(*writeMeta), writeMeta->GetIsPureEraseOp()); } break; } case NKikimrTxDataShard::TKqpTransaction_TDataTaskMeta_TKeyRange::KIND_NOT_SET: { - LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path - << ", shard: " << tabletId - << ", task: " << taskId << ", " << (Read ? "read range: UNSPECIFIED" : "write range: UNSPECIFIED")); + LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "Table " << tableInfo->Path << ", shard: " << tabletId << ", task: " << taskId << ", " << (Read ? "read range: UNSPECIFIED" : "write range: UNSPECIFIED")); if constexpr (Read) { - keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tableInfo->Range.ToTableRange(), - tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); + keyValidator.AddReadRange(tableId, GetColumns(*readMeta), tableInfo->Range.ToTableRange(), tableInfo->KeyColumnTypes, readMeta->GetItemsLimit(), readMeta->GetReverse()); } else { - keyValidator.AddWriteRange(tableId, tableInfo->Range.ToTableRange(), tableInfo->KeyColumnTypes, - GetColumnWrites(*writeMeta), writeMeta->GetIsPureEraseOp()); + keyValidator.AddWriteRange(tableId, tableInfo->Range.ToTableRange(), tableInfo->KeyColumnTypes, GetColumnWrites(*writeMeta), writeMeta->GetIsPureEraseOp()); } break; @@ -452,24 +406,19 @@ void KqpSetTxKeysImpl(ui64 tabletId, ui64 taskId, const TTableId& tableId, const } } -} // anonymous namespace +} // anonymous namespace -void KqpSetTxKeys(ui64 tabletId, ui64 taskId, const TUserTable* tableInfo, - const NKikimrTxDataShard::TKqpTransaction_TDataTaskMeta& meta, const NScheme::TTypeRegistry& typeRegistry, - const TActorContext& ctx, TKeyValidator& keyValidator) +void KqpSetTxKeys(ui64 tabletId, ui64 taskId, const TUserTable* tableInfo, const NKikimrTxDataShard::TKqpTransaction_TDataTaskMeta& meta, const NScheme::TTypeRegistry& typeRegistry, const TActorContext& ctx, TKeyValidator& keyValidator) { auto& tableMeta = meta.GetTable(); - auto tableId = TTableId(tableMeta.GetTableId().GetOwnerId(), tableMeta.GetTableId().GetTableId(), - tableMeta.GetSchemaVersion()); + auto tableId = TTableId(tableMeta.GetTableId().GetOwnerId(), tableMeta.GetTableId().GetTableId(), tableMeta.GetSchemaVersion()); for (auto& read : meta.GetReads()) { - KqpSetTxKeysImpl(tabletId, taskId, tableId, tableInfo, read.GetRange(), &read, nullptr, - typeRegistry, ctx, keyValidator); + KqpSetTxKeysImpl(tabletId, taskId, tableId, tableInfo, read.GetRange(), &read, nullptr, typeRegistry, ctx, keyValidator); } if (meta.HasWrites()) { - KqpSetTxKeysImpl(tabletId, taskId, tableId, tableInfo, meta.GetWrites().GetRange(), nullptr, - &meta.GetWrites(), typeRegistry, ctx, keyValidator); + KqpSetTxKeysImpl(tabletId, taskId, tableId, tableInfo, meta.GetWrites().GetRange(), nullptr, &meta.GetWrites(), typeRegistry, ctx, keyValidator); } } @@ -505,9 +454,7 @@ NYql::NDq::ERunStatus KqpRunTransaction(const TActorContext& ctx, ui64 txId, boo return RunKqpTransactionInternal(ctx, txId, /* inReadSets */ nullptr, useGenericReadSets, tasksRunner, /* applyEffects */ false); } -THolder KqpCompleteTransaction(const TActorContext& ctx, - ui64 origin, ui64 txId, const TInputOpData::TInReadSets* inReadSets, bool useGenericReadSets, NKqp::TKqpTasksRunner& tasksRunner, - const NMiniKQL::TKqpDatashardComputeContext& computeCtx) +THolder KqpCompleteTransaction(const TActorContext& ctx, ui64 origin, ui64 txId, const TInputOpData::TInReadSets* inReadSets, bool useGenericReadSets, NKqp::TKqpTasksRunner& tasksRunner, const NMiniKQL::TKqpDatashardComputeContext& computeCtx) { auto runStatus = RunKqpTransactionInternal(ctx, txId, inReadSets, useGenericReadSets, tasksRunner, /* applyEffects */ true); @@ -525,8 +472,7 @@ THolder KqpCompleteTransaction(const return nullptr; } - auto result = MakeHolder(NKikimrTxDataShard::TX_KIND_DATA, - origin, txId, NKikimrTxDataShard::TEvProposeTransactionResult::COMPLETE); + auto result = MakeHolder(NKikimrTxDataShard::TX_KIND_DATA, origin, txId, NKikimrTxDataShard::TEvProposeTransactionResult::COMPLETE); for (auto& [taskId, task] : tasksRunner.GetTasks()) { auto& taskRunner = tasksRunner.GetTaskRunner(task.GetId()); @@ -561,8 +507,8 @@ THolder KqpCompleteTransaction(const dataEv->Record.MutableChannelData()->SetFinished(fetchStatus == NUdf::EFetchStatus::Finish); if (outputDataSize > MaxDatashardReplySize) { auto message = TStringBuilder() << "Datashard " << origin - << ": reply size limit exceeded (" << outputDataSize << " > " - << MaxDatashardReplySize << ")"; + << ": reply size limit exceeded (" << outputDataSize << " > " + << MaxDatashardReplySize << ")"; LOG_WARN_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, message); result->SetExecutionError(NKikimrTxDataShard::TError::REPLY_SIZE_EXCEEDED, message); break; @@ -588,9 +534,7 @@ THolder KqpCompleteTransaction(const return result; } -void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrDataEvents::TKqpLocks& kqpLocks, - bool hasKqpLocks, bool useGenericReadSets, - NKqp::TKqpTasksRunner& tasksRunner, TSysLocks& sysLocks, ui64 tabletId) +void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrDataEvents::TKqpLocks& kqpLocks, bool hasKqpLocks, bool useGenericReadSets, NKqp::TKqpTasksRunner& tasksRunner, TSysLocks& sysLocks, ui64 tabletId) { TMap, NKikimrTxDataShard::TKqpReadset> readsetData; @@ -608,8 +552,7 @@ void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrD MKQL_ENSURE_S(fetchStatus == NUdf::EFetchStatus::Finish); MKQL_ENSURE(!outputData.IsOOB(), "Out-of-band data transport is not yet supported"); - auto key = std::make_pair(channel.GetSrcEndpoint().GetTabletId(), - channel.GetDstEndpoint().GetTabletId()); + auto key = std::make_pair(channel.GetSrcEndpoint().GetTabletId(), channel.GetDstEndpoint().GetTabletId()); auto& channelData = *readsetData[key].AddOutputs(); channelData.SetChannelId(channel.GetId()); @@ -634,8 +577,7 @@ void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrD validateLocksResult.SetSuccess(brokenLocks.empty()); for (auto& lock : brokenLocks) { - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, - "Found broken lock: " << lock.ShortDebugString()); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Found broken lock: " << lock.ShortDebugString()); if (useGenericReadSets) { decision = NKikimrTx::TReadSetData::DECISION_ABORT; } else { @@ -648,8 +590,7 @@ void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrD continue; } - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Send locks from " - << tabletId << " to " << dstTabletId << ", locks: " << validateLocksResult.ShortDebugString()); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Send locks from " << tabletId << " to " << dstTabletId << ", locks: " << validateLocksResult.ShortDebugString()); auto key = std::make_pair(tabletId, dstTabletId); if (useGenericReadSets) { @@ -688,9 +629,7 @@ void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrD } } -std::tuple> KqpValidateLocks(ui64 origin, TSysLocks& sysLocks, - const NKikimrDataEvents::TKqpLocks* kqpLocks, bool useGenericReadSets, const TInputOpData::TInReadSets& inReadSets) -{ +std::tuple> KqpValidateLocks(ui64 origin, TSysLocks& sysLocks, const NKikimrDataEvents::TKqpLocks* kqpLocks, bool useGenericReadSets, const TInputOpData::TInReadSets& inReadSets) { if (kqpLocks == nullptr || !NeedValidateLocks(kqpLocks->GetOp())) { return {true, {}}; } @@ -711,8 +650,7 @@ std::tuple> KqpValidateLocks(ui64 origin if (useGenericReadSets) { NKikimrTx::TReadSetData genericData; bool ok = genericData.ParseFromString(data.Body); - Y_ABORT_UNLESS(ok, "Failed to parse generic readset from %" PRIu64 " to %" PRIu64 " tabletId %" PRIu64, - readSet.first.first, readSet.first.second, data.Origin); + Y_ABORT_UNLESS(ok, "Failed to parse generic readset from %" PRIu64 " to %" PRIu64 " tabletId %" PRIu64, readSet.first.first, readSet.first.second, data.Origin); if (genericData.GetDecision() != NKikimrTx::TReadSetData::DECISION_COMMIT) { // Note: we don't know details on what failed at that shard @@ -738,10 +676,7 @@ std::tuple> KqpValidateLocks(ui64 origin return {true, {}}; } -std::tuple> KqpValidateVolatileTx(ui64 origin, TSysLocks& sysLocks, - const NKikimrDataEvents::TKqpLocks* kqpLocks, bool useGenericReadSets, ui64 txId, const TVector& delayedInReadSets, - TInputOpData::TAwaitingDecisions& awaitingDecisions, TOutputOpData::TOutReadSets& outReadSets) -{ +std::tuple> KqpValidateVolatileTx(ui64 origin, TSysLocks& sysLocks, const NKikimrDataEvents::TKqpLocks* kqpLocks, bool useGenericReadSets, ui64 txId, const TVector& delayedInReadSets, TInputOpData::TAwaitingDecisions& awaitingDecisions, TOutputOpData::TOutReadSets& outReadSets) { if (kqpLocks == nullptr || !NeedValidateLocks(kqpLocks->GetOp())) { return {true, {}}; } @@ -772,8 +707,7 @@ std::tuple> KqpValidateVolatileTx(ui64 o continue; } - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Send commit decision from " - << origin << " to " << dstTabletId); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Send commit decision from " << origin << " to " << dstTabletId); auto key = std::make_pair(origin, dstTabletId); NKikimrTx::TReadSetData data; @@ -797,8 +731,7 @@ std::tuple> KqpValidateVolatileTx(ui64 o continue; } - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Will wait for volatile decision from " - << srcTabletId << " to " << origin); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Will wait for volatile decision from " << srcTabletId << " to " << origin); awaitingDecisions.insert(srcTabletId); } @@ -809,8 +742,7 @@ std::tuple> KqpValidateVolatileTx(ui64 o ui64 srcTabletId = record.GetTabletSource(); ui64 dstTabletId = record.GetTabletDest(); if (dstTabletId != origin) { - LOG_WARN_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Ignoring unexpected readset from " - << srcTabletId << " to " << dstTabletId << " for txId# " << txId << " at tablet " << origin); + LOG_WARN_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Ignoring unexpected readset from " << srcTabletId << " to " << dstTabletId << " for txId# " << txId << " at tablet " << origin); continue; } if (!awaitingDecisions.contains(srcTabletId)) { @@ -818,12 +750,10 @@ std::tuple> KqpValidateVolatileTx(ui64 o } if (record.GetFlags() & NKikimrTx::TEvReadSet::FLAG_NO_DATA) { - Y_ABORT_UNLESS(!(record.GetFlags() & NKikimrTx::TEvReadSet::FLAG_EXPECT_READSET), - "Unexpected FLAG_EXPECT_READSET + FLAG_NO_DATA in delayed readsets"); + Y_ABORT_UNLESS(!(record.GetFlags() & NKikimrTx::TEvReadSet::FLAG_EXPECT_READSET), "Unexpected FLAG_EXPECT_READSET + FLAG_NO_DATA in delayed readsets"); // No readset data: participant aborted the transaction - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Processed readset without data from" - << srcTabletId << " to " << dstTabletId << " will abort txId# " << txId); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Processed readset without data from" << srcTabletId << " to " << dstTabletId << " will abort txId# " << txId); aborted = true; break; } @@ -834,15 +764,12 @@ std::tuple> KqpValidateVolatileTx(ui64 o if (data.GetDecision() != NKikimrTx::TReadSetData::DECISION_COMMIT) { // Explicit decision that is not a commit, need to abort - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Processed decision " - << ui32(data.GetDecision()) << " from " << srcTabletId << " to " << dstTabletId - << " for txId# " << txId); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Processed decision " << ui32(data.GetDecision()) << " from " << srcTabletId << " to " << dstTabletId << " for txId# " << txId); aborted = true; break; } - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Processed commit decision from " - << srcTabletId << " to " << dstTabletId << " for txId# " << txId); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Processed commit decision from " << srcTabletId << " to " << dstTabletId << " for txId# " << txId); awaitingDecisions.erase(srcTabletId); } @@ -898,20 +825,20 @@ void KqpCommitLocks(ui64 origin, const NKikimrDataEvents::TKqpLocks* kqpLocks, T } } -void KqpPrepareInReadsets(TInputOpData::TInReadSets& inReadSets, - const NKikimrDataEvents::TKqpLocks& kqpLocks, const NKqp::TKqpTasksRunner& tasksRunner, ui64 tabletId) +void KqpPrepareInReadsets(TInputOpData::TInReadSets& inReadSets, const NKikimrDataEvents::TKqpLocks& kqpLocks, const NKqp::TKqpTasksRunner* tasksRunner, ui64 tabletId) { - for (auto& [taskId, task] : tasksRunner.GetTasks()) { - for (ui32 i = 0; i < task.InputsSize(); ++i) { - for (auto& channel : task.GetInputs(i).GetChannels()) { - if (channel.GetIsPersistent()) { - MKQL_ENSURE_S(channel.GetSrcEndpoint().HasTabletId()); - MKQL_ENSURE_S(channel.GetDstEndpoint().HasTabletId()); + if (tasksRunner) { + for (auto& [taskId, task] : tasksRunner->GetTasks()) { + for (ui32 i = 0; i < task.InputsSize(); ++i) { + for (auto& channel : task.GetInputs(i).GetChannels()) { + if (channel.GetIsPersistent()) { + MKQL_ENSURE_S(channel.GetSrcEndpoint().HasTabletId()); + MKQL_ENSURE_S(channel.GetDstEndpoint().HasTabletId()); - auto key = std::make_pair(channel.GetSrcEndpoint().GetTabletId(), - channel.GetDstEndpoint().GetTabletId()); + auto key = std::make_pair(channel.GetSrcEndpoint().GetTabletId(), channel.GetDstEndpoint().GetTabletId()); - inReadSets.emplace(key, TVector()); + inReadSets.emplace(key, TVector()); + } } } } @@ -923,8 +850,7 @@ void KqpPrepareInReadsets(TInputOpData::TInReadSets& inReadSets, continue; } - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Prepare InReadsets from " << shardId - << " to " << tabletId); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "Prepare InReadsets from " << shardId << " to " << tabletId); auto key = std::make_pair(shardId, tabletId); inReadSets.emplace(key, TVector()); @@ -951,10 +877,8 @@ void KqpUpdateDataShardStatCounters(TDataShard& dataShard, const NMiniKQL::TEngi } } -void KqpFillTxStats(TDataShard& dataShard, const NMiniKQL::TEngineHostCounters& counters, - TEvDataShard::TEvProposeTransactionResult& result) +void KqpFillTxStats(TDataShard& dataShard, const NMiniKQL::TEngineHostCounters& counters, NKikimrQueryStats::TTxStats& stats) { - auto& stats = *result.Record.MutableTxStats(); auto& perTable = *stats.AddTableAccessStats(); perTable.MutableTableInfo()->SetSchemeshardId(dataShard.GetPathOwnerId()); Y_ABORT_UNLESS(dataShard.GetUserTables().size() == 1, "TODO: Fix handling of collocated tables"); @@ -983,9 +907,7 @@ void KqpFillTxStats(TDataShard& dataShard, const NMiniKQL::TEngineHostCounters& } } -void KqpFillStats(TDataShard& dataShard, const NKqp::TKqpTasksRunner& tasksRunner, - NMiniKQL::TKqpDatashardComputeContext& computeCtx, const NYql::NDqProto::EDqStatsMode& statsMode, - TEvDataShard::TEvProposeTransactionResult& result) +void KqpFillStats(TDataShard& dataShard, const NKqp::TKqpTasksRunner& tasksRunner, NMiniKQL::TKqpDatashardComputeContext& computeCtx, const NYql::NDqProto::EDqStatsMode& statsMode, TEvDataShard::TEvProposeTransactionResult& result) { Y_ABORT_UNLESS(dataShard.GetUserTables().size() == 1, "TODO: Fix handling of collocated tables"); auto tableInfo = dataShard.GetUserTables().begin(); @@ -1009,7 +931,7 @@ void KqpFillStats(TDataShard& dataShard, const NKqp::TKqpTasksRunner& tasksRunne protoTable->SetWriteBytes(taskTableStats.UpdateRowBytes); protoTable->SetEraseRows(taskTableStats.NEraseRow); - if (statsMode <= NYql::NDqProto::DQ_STATS_MODE_NONE) { // UNSPECIFIED === NONE + if (statsMode <= NYql::NDqProto::DQ_STATS_MODE_NONE) { // UNSPECIFIED === NONE continue; } @@ -1047,13 +969,9 @@ NYql::NDq::TDqTaskRunnerMemoryLimits DefaultKqpDataReqMemoryLimits() { namespace { -class TKqpTaskRunnerExecutionContext : public NDq::IDqTaskRunnerExecutionContext { +class TKqpTaskRunnerExecutionContext: public NDq::IDqTaskRunnerExecutionContext { public: - NDq::IDqOutputConsumer::TPtr CreateOutputConsumer(const NDqProto::TTaskOutput& outputDesc, - const NMiniKQL::TType* type, NUdf::IApplyContext* applyCtx, const NMiniKQL::TTypeEnvironment& typeEnv, - const NKikimr::NMiniKQL::THolderFactory& holderFactory, - TVector&& outputs) const override - { + NDq::IDqOutputConsumer::TPtr CreateOutputConsumer(const NDqProto::TTaskOutput& outputDesc, const NMiniKQL::TType* type, NUdf::IApplyContext* applyCtx, const NMiniKQL::TTypeEnvironment& typeEnv, const NKikimr::NMiniKQL::THolderFactory& holderFactory, TVector&& outputs) const override { return NKqp::KqpBuildOutputConsumer(outputDesc, type, applyCtx, typeEnv, holderFactory, std::move(outputs)); } @@ -1066,11 +984,11 @@ class TKqpTaskRunnerExecutionContext : public NDq::IDqTaskRunnerExecutionContext } }; -} // anonymous namespace +} // anonymous namespace THolder DefaultKqpExecutionContext() { return THolder(new TKqpTaskRunnerExecutionContext); } -} // namespace NDataShard -} // namespace NKikimr +} // namespace NDataShard +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_kqp.h b/ydb/core/tx/datashard/datashard_kqp.h index 62b5d860f40e..ae007c29b0fd 100644 --- a/ydb/core/tx/datashard/datashard_kqp.h +++ b/ydb/core/tx/datashard/datashard_kqp.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -35,7 +36,7 @@ void KqpFillOutReadSets(TOutputOpData::TOutReadSets& outReadSets, const NKikimrD NKqp::TKqpTasksRunner& tasksRunner, TSysLocks& sysLocks, ui64 tabletId); void KqpPrepareInReadsets(TInputOpData::TInReadSets& inReadSets, - const NKikimrDataEvents::TKqpLocks& kqpLocks, const NKqp::TKqpTasksRunner& tasksRunner, ui64 tabletId); + const NKikimrDataEvents::TKqpLocks& kqpLocks, const NKqp::TKqpTasksRunner* tasksRunner, ui64 tabletId); std::tuple> KqpValidateLocks(ui64 tabletId, TSysLocks& sysLocks, const NKikimrDataEvents::TKqpLocks* kqpLocks, bool useGenericReadSets, const TInputOpData::TInReadSets& inReadSets); @@ -48,8 +49,7 @@ void KqpCommitLocks(ui64 tabletId, const NKikimrDataEvents::TKqpLocks* kqpLocks, void KqpUpdateDataShardStatCounters(TDataShard& dataShard, const NMiniKQL::TEngineHostCounters& counters); -void KqpFillTxStats(TDataShard& dataShard, const NMiniKQL::TEngineHostCounters& counters, - TEvDataShard::TEvProposeTransactionResult& result); +void KqpFillTxStats(TDataShard& dataShard, const NMiniKQL::TEngineHostCounters& counters, NKikimrQueryStats::TTxStats& stats); void KqpFillStats(TDataShard& dataShard, const NKqp::TKqpTasksRunner& tasksRunner, NMiniKQL::TKqpDatashardComputeContext& computeCtx, const NYql::NDqProto::EDqStatsMode& statsMode, diff --git a/ydb/core/tx/datashard/datashard_pipeline.cpp b/ydb/core/tx/datashard/datashard_pipeline.cpp index bd70f187cbcd..21faf665e228 100644 --- a/ydb/core/tx/datashard/datashard_pipeline.cpp +++ b/ydb/core/tx/datashard/datashard_pipeline.cpp @@ -539,8 +539,12 @@ bool TPipeline::LoadTxDetails(TTransactionContext &txc, { auto it = DataTxCache.find(tx->GetTxId()); if (it != DataTxCache.end()) { - it->second->SetStep(tx->GetStep()); - tx->FillTxData(it->second); + auto baseTx = it->second; + Y_ABORT_UNLESS(baseTx->GetType() == TValidatedTx::EType::DataTx, "Wrong tx type in cache"); + TValidatedDataTx::TPtr dataTx = std::static_pointer_cast(baseTx); + + dataTx->SetStep(tx->GetStep()); + tx->FillTxData(dataTx); // Remove tx from cache. ForgetTx(tx->GetTxId()); @@ -591,6 +595,55 @@ bool TPipeline::LoadTxDetails(TTransactionContext &txc, return true; } +bool TPipeline::LoadWriteDetails(TTransactionContext& txc, const TActorContext& ctx, TWriteOperation::TPtr writeOp) +{ + auto it = DataTxCache.find(writeOp->GetTxId()); + if (it != DataTxCache.end()) { + auto baseTx = it->second; + Y_ABORT_UNLESS(baseTx->GetType() == TValidatedTx::EType::WriteTx, "Wrong writeOp type in cache"); + TValidatedWriteTx::TPtr dataTx = std::static_pointer_cast(baseTx); + + writeOp->FillTxData(dataTx); + // Remove writeOp from cache. + ForgetTx(writeOp->GetTxId()); + + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "LoadWriteDetails at " << Self->TabletID() << " got data writeOp from cache " << writeOp->GetStep() << ":" << writeOp->GetTxId()); + } else if (writeOp->HasVolatilePrepareFlag()) { + // Since transaction is volatile it was never stored on disk, and it + // shouldn't have any artifacts yet. + writeOp->FillVolatileTxData(Self, txc); + + ui32 keysCount = 0; + keysCount = writeOp->ExtractKeys(); + + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "LoadWriteDetails at " << Self->TabletID() << " loaded writeOp from memory " << writeOp->GetStep() << ":" << writeOp->GetTxId() << " keys extracted: " << keysCount); + } else { + NIceDb::TNiceDb db(txc.DB); + TActorId target; + TString txBody; + TVector locks; + ui64 artifactFlags = 0; + bool ok = Self->TransQueue.LoadTxDetails(db, writeOp->GetTxId(), target, txBody, locks, artifactFlags); + if (!ok) + return false; + + // Check we have enough memory to parse writeOp. + ui64 requiredMem = txBody.size() * 10; + if (MaybeRequestMoreTxMemory(requiredMem, txc)) + return false; + + writeOp->FillTxData(Self, txc, target, txBody, std::move(locks), artifactFlags); + + ui32 keysCount = 0; + //if (Config.LimitActiveTx > 1) + keysCount = writeOp->ExtractKeys(); + + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "LoadWriteDetails at " << Self->TabletID() << " loaded writeOp from db " << writeOp->GetStep() << ":" << writeOp->GetTxId() << " keys extracted: " << keysCount); + } + + return true; +} + void TPipeline::DeactivateOp(TOperation::TPtr op, TTransactionContext& txc, const TActorContext &ctx) @@ -1280,13 +1333,13 @@ ui64 TPipeline::GetInactiveTxSize() const { return res; } -bool TPipeline::SaveForPropose(TValidatedDataTx::TPtr tx) { - Y_ABORT_UNLESS(tx && tx->TxId()); +bool TPipeline::SaveForPropose(TValidatedTx::TPtr tx) { + Y_ABORT_UNLESS(tx && tx->GetTxId()); if (DataTxCache.size() <= Config.LimitDataTxCache) { - ui64 quota = tx->GetTxSize() + tx->GetMemoryAllocated(); + ui64 quota = tx->GetMemoryConsumption(); if (Self->TryCaptureTxCache(quota)) { tx->SetTxCacheUsage(quota); - DataTxCache[tx->TxId()] = tx; + DataTxCache[tx->GetTxId()] = tx; return true; } } @@ -1571,14 +1624,14 @@ TOperation::TPtr TPipeline::BuildOperation(TEvDataShard::TEvProposeTransaction:: return tx; } -TOperation::TPtr TPipeline::BuildOperation(NEvents::TDataEvents::TEvWrite::TPtr& ev, +TOperation::TPtr TPipeline::BuildOperation(NEvents::TDataEvents::TEvWrite::TPtr&& ev, TInstant receivedAt, ui64 tieBreakerIndex, NTabletFlatExecutor::TTransactionContext& txc, - const TActorContext& ctx, NWilson::TSpan &&operationSpan) + NWilson::TSpan &&operationSpan) { const auto& rec = ev->Get()->Record; - TBasicOpInfo info(rec.GetTxId(), EOperationKind::WriteTx, EvWrite::Convertor::GetProposeFlags(rec.GetTxMode()), 0, receivedAt, tieBreakerIndex); - auto writeOp = MakeIntrusive(info, ev, Self, txc, ctx); + TBasicOpInfo info(rec.GetTxId(), EOperationKind::WriteTx, NEvWrite::TConvertor::GetProposeFlags(rec.GetTxMode()), 0, receivedAt, tieBreakerIndex); + auto writeOp = MakeIntrusive(info, std::move(ev), Self, txc); writeOp->OperationSpan = std::move(operationSpan); auto writeTx = writeOp->GetWriteTx(); Y_ABORT_UNLESS(writeTx); @@ -1589,14 +1642,14 @@ TOperation::TPtr TPipeline::BuildOperation(NEvents::TDataEvents::TEvWrite::TPtr& }; if (!writeTx->Ready()) { - badRequest(EvWrite::Convertor::ConvertErrCode(writeOp->GetWriteTx()->GetErrCode()), TStringBuilder() << "Cannot parse tx " << writeOp->GetTxId() << ". " << writeOp->GetWriteTx()->GetErrCode() << ": " << writeOp->GetWriteTx()->GetErrStr()); + badRequest(NEvWrite::TConvertor::ConvertErrCode(writeOp->GetWriteTx()->GetErrCode()), TStringBuilder() << "Cannot parse tx " << writeOp->GetTxId() << ". " << writeOp->GetWriteTx()->GetErrCode() << ": " << writeOp->GetWriteTx()->GetErrStr()); return writeOp; } writeTx->ExtractKeys(true); if (!writeTx->Ready()) { - badRequest(EvWrite::Convertor::ConvertErrCode(writeOp->GetWriteTx()->GetErrCode()), TStringBuilder() << "Cannot parse tx keys " << writeOp->GetTxId() << ". " << writeOp->GetWriteTx()->GetErrCode() << ": " << writeOp->GetWriteTx()->GetErrStr()); + badRequest(NEvWrite::TConvertor::ConvertErrCode(writeOp->GetWriteTx()->GetErrCode()), TStringBuilder() << "Cannot parse tx keys " << writeOp->GetTxId() << ". " << writeOp->GetWriteTx()->GetErrCode() << ": " << writeOp->GetWriteTx()->GetErrStr()); return writeOp; } diff --git a/ydb/core/tx/datashard/datashard_pipeline.h b/ydb/core/tx/datashard/datashard_pipeline.h index ed06f2c0362c..a2c7f56719e6 100644 --- a/ydb/core/tx/datashard/datashard_pipeline.h +++ b/ydb/core/tx/datashard/datashard_pipeline.h @@ -3,6 +3,7 @@ #include "datashard.h" #include "datashard_trans_queue.h" #include "datashard_active_transaction.h" +#include "datashard_write_operation.h" #include "datashard_dep_tracker.h" #include "datashard_user_table.h" #include "execution_unit.h" @@ -108,7 +109,7 @@ class TPipeline : TNonCopyable { // tx propose - bool SaveForPropose(TValidatedDataTx::TPtr tx); + bool SaveForPropose(TValidatedTx::TPtr tx); void SetProposed(ui64 txId, const TActorId& actorId); void ForgetUnproposedTx(ui64 txId); @@ -121,6 +122,7 @@ class TPipeline : TNonCopyable { bool IsReadyOp(TOperation::TPtr op); bool LoadTxDetails(TTransactionContext &txc, const TActorContext &ctx, TActiveTransaction::TPtr tx); + bool LoadWriteDetails(TTransactionContext& txc, const TActorContext& ctx, TWriteOperation::TPtr tx); void DeactivateOp(TOperation::TPtr op, TTransactionContext& txc, const TActorContext &ctx); void RemoveTx(TStepOrder stepTxId); @@ -267,10 +269,10 @@ class TPipeline : TNonCopyable { TInstant receivedAt, ui64 tieBreakerIndex, NTabletFlatExecutor::TTransactionContext &txc, const TActorContext &ctx, NWilson::TSpan &&operationSpan); - TOperation::TPtr BuildOperation(NEvents::TDataEvents::TEvWrite::TPtr &ev, + TOperation::TPtr BuildOperation(NEvents::TDataEvents::TEvWrite::TPtr&& ev, TInstant receivedAt, ui64 tieBreakerIndex, NTabletFlatExecutor::TTransactionContext &txc, - const TActorContext &ctx, NWilson::TSpan &&operationSpan); + NWilson::TSpan &&operationSpan); void BuildDataTx(TActiveTransaction *tx, TTransactionContext &txc, const TActorContext &ctx); @@ -282,6 +284,14 @@ class TPipeline : TNonCopyable { return tx->RestoreTxData(Self, txc, ctx); } + ERestoreDataStatus RestoreDataTx( + TWriteOperation* tx, + TTransactionContext& txc + ) + { + return tx->RestoreTxData(Self, txc); + } + void RegisterDistributedWrites(const TOperation::TPtr& op, NTable::TDatabase& db); // Execution units @@ -487,7 +497,7 @@ class TPipeline : TNonCopyable { TSortedOps ActivePlannedOps; TSortedOps::iterator ActivePlannedOpsLogicallyCompleteEnd; TSortedOps::iterator ActivePlannedOpsLogicallyIncompleteEnd; - THashMap DataTxCache; + THashMap DataTxCache; TMap, 1>> DelayedAcks; TStepOrder LastPlannedTx; TStepOrder LastCompleteTx; diff --git a/ydb/core/tx/datashard/datashard_trans_queue.cpp b/ydb/core/tx/datashard/datashard_trans_queue.cpp index 05e664406eea..937d4b2019b0 100644 --- a/ydb/core/tx/datashard/datashard_trans_queue.cpp +++ b/ydb/core/tx/datashard/datashard_trans_queue.cpp @@ -82,7 +82,9 @@ bool TTransQueue::Load(NIceDb::TNiceDb& db) { flags |= TTxFlags::Stored; TBasicOpInfo info(txId, kind, flags, maxStep, TInstant::FromValue(received), Self->NextTieBreakerIndex++); - auto op = MakeIntrusive(info); + + TOperation::TPtr op = NEvWrite::TConvertor::MakeOperation(kind, info, Self->TabletID()); + if (rowset.HaveValue()) { op->SetTarget(rowset.GetValue()); } diff --git a/ydb/core/tx/datashard/datashard_trans_queue.h b/ydb/core/tx/datashard/datashard_trans_queue.h index 3295285ba48d..36aa50785d8c 100644 --- a/ydb/core/tx/datashard/datashard_trans_queue.h +++ b/ydb/core/tx/datashard/datashard_trans_queue.h @@ -26,6 +26,7 @@ class TTransQueue { public: friend class TPipeline; friend class TActiveTransaction; + friend class TWriteOperation; TTransQueue(TDataShard * self) : Self(self) diff --git a/ydb/core/tx/datashard/datashard_user_db.cpp b/ydb/core/tx/datashard/datashard_user_db.cpp index 59edf2e6699d..b7c1e0dd16d3 100644 --- a/ydb/core/tx/datashard/datashard_user_db.cpp +++ b/ydb/core/tx/datashard/datashard_user_db.cpp @@ -1,5 +1,7 @@ #include "datashard_user_db.h" +#include "datashard_impl.h" + namespace NKikimr::NDataShard { TDataShardUserDb::TDataShardUserDb(TDataShard& self, NTable::TDatabase& db, ui64 globalTxId, const TRowVersion& readVersion, const TRowVersion& writeVersion, NMiniKQL::TEngineHostCounters& counters, TInstant now) diff --git a/ydb/core/tx/datashard/datashard_user_db.h b/ydb/core/tx/datashard/datashard_user_db.h index a7ac89eaca8c..35cf93f3fda1 100644 --- a/ydb/core/tx/datashard/datashard_user_db.h +++ b/ydb/core/tx/datashard/datashard_user_db.h @@ -1,8 +1,18 @@ #pragma once -#include "datashard_impl.h" + +#include "change_collector.h" + +#include +#include +#include +#include #include +namespace NKikimr::NMiniKQL { + struct TEngineHostCounters; +} + namespace NKikimr::NDataShard { class IDataShardUserDb { diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index a1e2927411ad..f1842d96ca68 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -20,6 +20,7 @@ namespace NDataShard { struct TUserTable : public TThrRefBase { using TPtr = TIntrusivePtr; using TCPtr = TIntrusiveConstPtr; + using TTableInfos = THashMap; struct TUserFamily { using ECodec = NTable::NPage::ECodec; diff --git a/ydb/core/tx/datashard/datashard_ut_write.cpp b/ydb/core/tx/datashard/datashard_ut_write.cpp index 55f8dcd2865a..2a2fd095d596 100644 --- a/ydb/core/tx/datashard/datashard_ut_write.cpp +++ b/ydb/core/tx/datashard/datashard_ut_write.cpp @@ -11,6 +11,8 @@ using namespace Tests; using namespace NDataShardReadTableTest; Y_UNIT_TEST_SUITE(DataShardWrite) { + const TString expectedTableState = "key = 0, value = 1\nkey = 2, value = 3\nkey = 4, value = 5\n"; + std::tuple TestCreateServer() { TPortManager pm; TServerSettings serverSettings(pm.GetPort(2134)); @@ -21,7 +23,6 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { auto sender = runtime.AllocateEdgeActor(); runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); - runtime.SetLogPriority(NKikimrServices::TX_PROXY, NLog::PRI_DEBUG); runtime.GetAppData().AllowReadTableImmediate = true; InitRoot(server, sender); @@ -29,7 +30,7 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { return {runtime, server, sender}; } - Y_UNIT_TEST_TWIN(Upsert, EvWrite) { + Y_UNIT_TEST_TWIN(UpsertImmediate, EvWrite) { auto [runtime, server, sender] = TestCreateServer(); auto opts = TShardedTableOptions(); @@ -38,50 +39,103 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { auto rows = EvWrite ? TEvWriteRows{{{0, 1}}, {{2, 3}}, {{4, 5}}} : TEvWriteRows{}; auto evWriteObservers = ReplaceEvProposeTransactionWithEvWrite(runtime, rows); - ExecSQL(server, sender, Q_("UPSERT INTO `/Root/table-1` (key, value) VALUES (0, 1);")); - ExecSQL(server, sender, Q_("UPSERT INTO `/Root/table-1` (key, value) VALUES (2, 3);")); - ExecSQL(server, sender, Q_("UPSERT INTO `/Root/table-1` (key, value) VALUES (4, 5);")); + Cout << "========= Send immediate write =========\n"; + { + ExecSQL(server, sender, Q_("UPSERT INTO `/Root/table-1` (key, value) VALUES (0, 1);")); + ExecSQL(server, sender, Q_("UPSERT INTO `/Root/table-1` (key, value) VALUES (2, 3);")); + ExecSQL(server, sender, Q_("UPSERT INTO `/Root/table-1` (key, value) VALUES (4, 5);")); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } + } + + Y_UNIT_TEST_TWIN(UpsertPrepared, EvWrite) { + auto [runtime, server, sender] = TestCreateServer(); + + // Disable volatile transactions, since EvWrite has not yet supported them. + runtime.GetAppData().FeatureFlags.SetEnableDataShardVolatileTransactions(false); - auto table1state = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + auto opts = TShardedTableOptions(); + auto [shards1, tableId1] = CreateShardedTable(server, sender, "/Root", "table-1", opts); + auto [shards2, tableId2] = CreateShardedTable(server, sender, "/Root", "table-2", opts); - UNIT_ASSERT_VALUES_EQUAL(table1state, "key = 0, value = 1\n" - "key = 2, value = 3\n" - "key = 4, value = 5\n"); + auto rows = EvWrite ? TEvWriteRows{{tableId1, {0, 1}}, {tableId2, {2, 3}}} : TEvWriteRows{}; + auto evWriteObservers = ReplaceEvProposeTransactionWithEvWrite(runtime, rows); + + Cout << "========= Send distributed write =========\n"; + { + ExecSQL(server, sender, Q_( + "UPSERT INTO `/Root/table-1` (key, value) VALUES (0, 1);" + "UPSERT INTO `/Root/table-2` (key, value) VALUES (2, 3);")); + } + + Cout << "========= Read tables =========\n"; + { + auto tableState1 = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + auto tableState2 = TReadTableState(server, MakeReadTableSettings("/Root/table-2")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState1, "key = 0, value = 1\n"); + UNIT_ASSERT_VALUES_EQUAL(tableState2, "key = 2, value = 3\n"); + } } - Y_UNIT_TEST(WriteImmediateOnShard) { + Y_UNIT_TEST(WriteImmediate) { auto [runtime, server, sender] = TestCreateServer(); auto opts = TShardedTableOptions().Columns({{"key", "Uint32", true, false}, {"value", "Uint32", false, false}}); auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); - + const ui64 shard = shards[0]; const ui32 rowCount = 3; - ui64 txId = 100; - Write(runtime, sender, shards[0], tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - auto table1state = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + ui64 txId = 100; - UNIT_ASSERT_VALUES_EQUAL(table1state, "key = 0, value = 1\n" - "key = 2, value = 3\n" - "key = 4, value = 5\n"); + Cout << "========= Send immediate write =========\n"; + { + const auto writeResult = Write(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrigin(), shard); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetStep(), 0); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrderId(), txId); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetTxId(), txId); + + const auto& tableAccessStats = writeResult.GetTxStats().GetTableAccessStats(0); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetTableInfo().GetName(), "/Root/table-1"); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetUpdateRow().GetCount(), rowCount); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } } - Y_UNIT_TEST(WriteImmediateOnShardManyColumns) { + Y_UNIT_TEST(WriteImmediateManyColumns) { auto [runtime, server, sender] = TestCreateServer(); auto opts = TShardedTableOptions().Columns({{"key64", "Uint64", true, false}, {"key32", "Uint32", true, false}, {"value64", "Uint64", false, false}, {"value32", "Uint32", false, false}, {"valueUtf8", "Utf8", false, false}}); auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); - + const ui64 shard = shards[0]; const ui32 rowCount = 3; - ui64 txId = 100; - Write(runtime, sender, shards[0], tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - auto table1state = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + ui64 txId = 100; - UNIT_ASSERT_VALUES_EQUAL(table1state, "key64 = 0, key32 = 1, value64 = 2, value32 = 3, valueUtf8 = String_4\n" - "key64 = 5, key32 = 6, value64 = 7, value32 = 8, valueUtf8 = String_9\n" - "key64 = 10, key32 = 11, value64 = 12, value32 = 13, valueUtf8 = String_14\n"); + Cout << "========= Send immediate write =========\n"; + { + Write(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, "key64 = 0, key32 = 1, value64 = 2, value32 = 3, valueUtf8 = String_4\n" + "key64 = 5, key32 = 6, value64 = 7, value32 = 8, valueUtf8 = String_9\n" + "key64 = 10, key32 = 11, value64 = 12, value32 = 13, valueUtf8 = String_14\n"); + } } Y_UNIT_TEST(WriteImmediateHugeKey) { @@ -89,33 +143,81 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { auto opts = TShardedTableOptions().Columns({{"key", "Utf8", true, false}}); auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); + const ui64 shard = shards[0]; - TString hugeStringValue(NLimits::MaxWriteKeySize + 1, 'X'); - TSerializedCellMatrix matrix({TCell(hugeStringValue.c_str(), hugeStringValue.size())}, 1, 1); + Cout << "========= Send immediate write =========\n"; + { + TString hugeStringValue(NLimits::MaxWriteKeySize + 1, 'X'); + TSerializedCellMatrix matrix({TCell(hugeStringValue.c_str(), hugeStringValue.size())}, 1, 1); - auto evWrite = std::make_unique(100, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(matrix.ReleaseBuffer()); - evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, {1}, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); + auto evWrite = std::make_unique(100, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(matrix.ReleaseBuffer()); + evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, {1}, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); - const auto& record = Write(runtime, sender, shards[0], std::move(evWrite), NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST); - - UNIT_ASSERT_VALUES_EQUAL(record.GetIssues().size(), 1); - UNIT_ASSERT(record.GetIssues(0).message().Contains("Row key size of 1049601 bytes is larger than the allowed threshold 1049600")); + const auto writeResult = Write(runtime, sender, shard, std::move(evWrite), NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetIssues().size(), 1); + UNIT_ASSERT(writeResult.GetIssues(0).message().Contains("Row key size of 1049601 bytes is larger than the allowed threshold 1049600")); + } } - Y_UNIT_TEST(WriteOnShard) { + Y_UNIT_TEST(WritePrepared) { auto [runtime, server, sender] = TestCreateServer(); TShardedTableOptions opts; - auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); - + const auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); + const ui64 shard = shards[0]; + const ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); const ui32 rowCount = 3; - ui64 txId = 100; - Write(runtime, sender, shards[0], tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); - - auto table1state = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); - UNIT_ASSERT_VALUES_EQUAL(table1state, ""); - } -} -} \ No newline at end of file + ui64 txId = 100; + ui64 minStep, maxStep; + + Cout << "========= Send prepare =========\n"; + { + const auto writeResult = Write(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); + + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED); + UNIT_ASSERT_GT(writeResult.GetMinStep(), 0); + UNIT_ASSERT_GT(writeResult.GetMaxStep(), writeResult.GetMinStep()); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrigin(), shard); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetTxId(), txId); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetDomainCoordinators().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetDomainCoordinators(0), coordinator); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetTabletInfo().GetTabletId(), shard); + + minStep = writeResult.GetMinStep(); + maxStep = writeResult.GetMaxStep(); + } + + Cout << "========= Send propose to coordinator =========\n"; + { + SendProposeToCoordinator(server, shards, minStep, maxStep, txId); + } + + Cout << "========= Wait for completed transaction =========\n"; + { + auto ev = runtime.GrabEdgeEventRethrow(sender); + auto writeResult = ev->Get()->Record; + + UNIT_ASSERT_VALUES_EQUAL_C(writeResult.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED, "Status: " << writeResult.GetStatus() << " Issues: " << writeResult.GetIssues()); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrigin(), shard); + UNIT_ASSERT_GE(writeResult.GetStep(), minStep); + UNIT_ASSERT_LE(writeResult.GetStep(), maxStep); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrderId(), txId); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetTxId(), txId); + + const auto& tableAccessStats = writeResult.GetTxStats().GetTableAccessStats(0); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetTableInfo().GetName(), "/Root/table-1"); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetUpdateRow().GetCount(), rowCount); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } + + } // Y_UNIT_TEST + +} // Y_UNIT_TEST_SUITE +} // namespace NKikimr \ No newline at end of file diff --git a/ydb/core/tx/datashard/datashard_write.h b/ydb/core/tx/datashard/datashard_write.h index 9caedb35276c..2df849319437 100644 --- a/ydb/core/tx/datashard/datashard_write.h +++ b/ydb/core/tx/datashard/datashard_write.h @@ -1,21 +1,23 @@ #pragma once +#include #include #include #include #include -namespace NKikimr::NDataShard::EvWrite { +namespace NKikimr::NDataShard::NEvWrite { using namespace NActors; -class Convertor { +class TConvertor { public: static ui64 GetTxId(const TAutoPtr& ev); static ui64 GetProposeFlags(NKikimrDataEvents::TEvWrite::ETxMode txMode); static NKikimrDataEvents::TEvWrite::ETxMode GetTxMode(ui64 flags); static NKikimrTxDataShard::TEvProposeTransactionResult::EStatus GetStatus(NKikimrDataEvents::TEvWriteResult::EStatus status); static NKikimrDataEvents::TEvWriteResult::EStatus ConvertErrCode(NKikimrTxDataShard::TError::EKind code); + static TOperation::TPtr MakeOperation(EOperationKind kind, const TBasicOpInfo& info, ui64 tabletId); }; } \ No newline at end of file diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index cbd94c202d4e..f65b80e6918e 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -14,15 +14,31 @@ #include +#if defined LOG_T || \ + defined LOG_D || \ + defined LOG_I || \ + defined LOG_N || \ + defined LOG_W || \ + defined LOG_E || \ + defined LOG_C + #error log macro redefinition +#endif + +#define LOG_T(stream) LOG_TRACE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) +#define LOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) +#define LOG_I(stream) LOG_INFO_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) +#define LOG_N(stream) LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) +#define LOG_W(stream) LOG_WARN_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) +#define LOG_E(stream) LOG_ERROR_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) +#define LOG_C(stream) LOG_CRIT_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, stream) + namespace NKikimr { namespace NDataShard { -TValidatedWriteTx::TValidatedWriteTx(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx, ui64 globalTxId, TInstant receivedAt, const TRowVersion& readVersion, const TRowVersion& writeVersion, const NEvents::TDataEvents::TEvWrite::TPtr& ev) - : Ev(ev) - , UserDb(*self, txc.DB, globalTxId, readVersion, writeVersion, EngineHostCounters, TAppData::TimeProvider->Now()) +TValidatedWriteTx::TValidatedWriteTx(TDataShard* self, TTransactionContext& txc, ui64 globalTxId, TInstant receivedAt, const TRowVersion& readVersion, const TRowVersion& writeVersion, const NEvents::TDataEvents::TEvWrite& ev) + : UserDb(*self, txc.DB, globalTxId, readVersion, writeVersion, EngineHostCounters, TAppData::TimeProvider->Now()) , KeyValidator(*self, txc.DB) , TabletId(self->TabletID()) - , Ctx(ctx) , ReceivedAt(receivedAt) , TxSize(0) , ErrCode(NKikimrTxDataShard::TError::OK) @@ -32,27 +48,36 @@ TValidatedWriteTx::TValidatedWriteTx(TDataShard* self, TTransactionContext& txc, NActors::NMemory::TLabel::Add(TxSize); UserDb.SetIsWriteTx(true); - - if (LockTxId()) { - UserDb.SetLockTxId(LockTxId()); - UserDb.SetLockNodeId(LockNodeId()); + + const NKikimrDataEvents::TEvWrite& record = ev.Record; + + if (record.GetLockTxId()) { + UserDb.SetLockTxId(record.GetLockTxId()); + UserDb.SetLockNodeId(record.GetLockNodeId()); } - if (Immediate()) + if (record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) UserDb.SetIsImmediateTx(true); NKikimrTxDataShard::TKqpTransaction::TDataTaskMeta meta; - LOG_TRACE_S(Ctx, NKikimrServices::TX_DATASHARD, "Parsing write transaction for " << globalTxId << " at " << TabletId << ", record: " << GetRecord().ShortDebugString()); + LOG_T("Parsing write transaction for " << globalTxId << " at " << TabletId << ", record: " << record.ShortDebugString()); - if (HasOperations()) { - if (!ParseOperations(self->TableInfos)) - return; + if (record.operations().size() != 0) { + Y_ABORT_UNLESS(record.operations().size() == 1, "Only one operation is supported now"); + Y_ABORT_UNLESS(record.operations(0).GetType() == NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, "Only UPSERT operation is supported now"); + const NKikimrDataEvents::TEvWrite::TOperation& recordOperation = record.operations(0); + + ColumnIds = {recordOperation.GetColumnIds().begin(), recordOperation.GetColumnIds().end()}; - SetTxKeys(RecordOperation().GetColumnIds()); + if (!ParseOperation(ev, recordOperation, self->TableInfos)) + return; } - KqpSetTxLocksKeys(GetKqpLocks(), self->SysLocksTable(), KeyValidator); + if (record.HasLocks()) { + KqpLocks = record.GetLocks(); + KqpSetTxLocksKeys(record.GetLocks(), self->SysLocksTable(), KeyValidator); + } KeyValidator.GetInfo().SetLoaded(); } @@ -60,15 +85,8 @@ TValidatedWriteTx::~TValidatedWriteTx() { NActors::NMemory::TLabel::Sub(TxSize); } -bool TValidatedWriteTx::ParseOperations(const TDataShard::TTableInfos& tableInfos) { - if (GetRecord().GetOperations().size() != 1) - { - ErrCode = NKikimrTxDataShard::TError::BAD_ARGUMENT; - ErrStr = TStringBuilder() << "Only one operation is supported now."; - return false; - } - - const NKikimrDataEvents::TTableId& tableIdRecord = RecordOperation().GetTableId(); +bool TValidatedWriteTx::ParseOperation(const NEvents::TDataEvents::TEvWrite& ev, const NKikimrDataEvents::TEvWrite::TOperation& recordOperation, const TUserTable::TTableInfos& tableInfos) { + const NKikimrDataEvents::TTableId& tableIdRecord = recordOperation.GetTableId(); auto tableInfoPtr = tableInfos.FindPtr(tableIdRecord.GetTableId()); if (!tableInfoPtr) { @@ -76,58 +94,57 @@ bool TValidatedWriteTx::ParseOperations(const TDataShard::TTableInfos& tableInfo ErrStr = TStringBuilder() << "Table '" << tableIdRecord.GetTableId() << "' doesn't exist."; return false; } - TableInfo = tableInfoPtr->Get(); - Y_ABORT_UNLESS(TableInfo); - if (TableInfo->GetTableSchemaVersion() != 0 && tableIdRecord.GetSchemaVersion() != TableInfo->GetTableSchemaVersion()) + const TUserTable& tableInfo = *tableInfoPtr->Get(); + + if (tableInfo.GetTableSchemaVersion() != 0 && tableIdRecord.GetSchemaVersion() != tableInfo.GetTableSchemaVersion()) { ErrCode = NKikimrTxDataShard::TError::SCHEME_CHANGED; - ErrStr = TStringBuilder() << "Table '" << TableInfo->Path << "' scheme changed."; + ErrStr = TStringBuilder() << "Table '" << tableInfo.Path << "' scheme changed."; return false; } - if (RecordOperation().GetPayloadFormat() != NKikimrDataEvents::FORMAT_CELLVEC) + if (recordOperation.GetPayloadFormat() != NKikimrDataEvents::FORMAT_CELLVEC) { ErrCode = NKikimrTxDataShard::TError::BAD_ARGUMENT; - ErrStr = TStringBuilder() << "Only FORMAT_CELLVEC is supported now. Got: " << RecordOperation().GetPayloadFormat(); + ErrStr = TStringBuilder() << "Only FORMAT_CELLVEC is supported now. Got: " << recordOperation.GetPayloadFormat(); return false; } - NEvWrite::TPayloadReader payloadReader(*Ev->Get()); - TString payload = payloadReader.GetDataFromPayload(RecordOperation().GetPayloadIndex()); + ::NKikimr::NEvWrite::TPayloadReader payloadReader(ev); + TString payload = payloadReader.GetDataFromPayload(recordOperation.GetPayloadIndex()); - if (!TSerializedCellMatrix::TryParse(payload,Matrix)) + if (!TSerializedCellMatrix::TryParse(payload, Matrix)) { ErrCode = NKikimrTxDataShard::TError::BAD_ARGUMENT; ErrStr = TStringBuilder() << "Can't parse TSerializedCellVec in payload"; return false; } - const auto& columnTags = RecordOperation().GetColumnIds(); - if ((size_t)columnTags.size() != Matrix.GetColCount()) + if ((size_t)ColumnIds.size() != Matrix.GetColCount()) { ErrCode = NKikimrTxDataShard::TError::BAD_ARGUMENT; - ErrStr = TStringBuilder() << "Column count mismatch: got columnids " << columnTags.size() << ", got cells count " <KeyColumnIds.size()) + if ((size_t)ColumnIds.size() < tableInfo.KeyColumnIds.size()) { ErrCode = NKikimrTxDataShard::TError::SCHEME_ERROR; - ErrStr = TStringBuilder() << "Column count mismatch: got " << columnTags.size() << ", expected greater or equal than key column count " << TableInfo->KeyColumnIds.size(); + ErrStr = TStringBuilder() << "Column count mismatch: got " << ColumnIds.size() << ", expected greater or equal than key column count " << tableInfo.KeyColumnIds.size(); return false; } - for (size_t i = 0; i < TableInfo->KeyColumnIds.size(); ++i) { - if (RecordOperation().columnids(i) != TableInfo->KeyColumnIds[i]) { + for (size_t i = 0; i < tableInfo.KeyColumnIds.size(); ++i) { + if (ColumnIds[i] != tableInfo.KeyColumnIds[i]) { ErrCode = NKikimrTxDataShard::TError::SCHEME_ERROR; ErrStr = TStringBuilder() << "Key column schema at position " << i; return false; } } - for (ui32 columnTag : columnTags) { - auto* col = TableInfo->Columns.FindPtr(columnTag); + for (ui32 columnTag : ColumnIds) { + auto* col = tableInfo.Columns.FindPtr(columnTag); if (!col) { ErrCode = NKikimrTxDataShard::TError::SCHEME_ERROR; ErrStr = TStringBuilder() << "Missing column with id " << columnTag; @@ -138,8 +155,8 @@ bool TValidatedWriteTx::ParseOperations(const TDataShard::TTableInfos& tableInfo for (ui32 rowIdx = 0; rowIdx < Matrix.GetRowCount(); ++rowIdx) { ui64 keyBytes = 0; - for (ui16 keyColIdx = 0; keyColIdx < TableInfo->KeyColumnIds.size(); ++keyColIdx) { - const auto& cellType = TableInfo->KeyColumnTypes[keyColIdx]; + for (ui16 keyColIdx = 0; keyColIdx < tableInfo.KeyColumnIds.size(); ++keyColIdx) { + const auto& cellType = tableInfo.KeyColumnTypes[keyColIdx]; const TCell& cell = Matrix.GetCell(rowIdx, keyColIdx); if (cellType.GetTypeId() == NScheme::NTypeIds::Uint8 && !cell.IsNull() && cell.AsValue() > 127) { ErrCode = NKikimrTxDataShard::TError::BAD_ARGUMENT; @@ -155,7 +172,7 @@ bool TValidatedWriteTx::ParseOperations(const TDataShard::TTableInfos& tableInfo return false; } - for (ui16 valueColIdx = TableInfo->KeyColumnIds.size(); valueColIdx < Matrix.GetColCount(); ++valueColIdx) { + for (ui16 valueColIdx = tableInfo.KeyColumnIds.size(); valueColIdx < Matrix.GetColCount(); ++valueColIdx) { const TCell& cell = Matrix.GetCell(rowIdx, valueColIdx); if (cell.Size() > NLimits::MaxWriteValueSize) { ErrCode = NKikimrTxDataShard::TError::BAD_ARGUMENT; @@ -166,33 +183,38 @@ bool TValidatedWriteTx::ParseOperations(const TDataShard::TTableInfos& tableInfo } TableId = TTableId(tableIdRecord.GetOwnerId(), tableIdRecord.GetTableId(), tableIdRecord.GetSchemaVersion()); + + SetTxKeys(tableInfo); + return true; } -TVector GetColumnWrites(const ::google::protobuf::RepeatedField<::NProtoBuf::uint32>& columnTags) { +TVector TValidatedWriteTx::GetColumnWrites() const { TVector writeColumns; - writeColumns.reserve(columnTags.size()); - for (ui32 columnTag : columnTags) { + writeColumns.reserve(ColumnIds.size()); + for (ui32 columnTag : ColumnIds) { TKeyValidator::TColumnWriteMeta writeColumn; writeColumn.Column = NTable::TColumn("", columnTag, {}, {}); - writeColumns.push_back(std::move(writeColumn)); } return writeColumns; } -void TValidatedWriteTx::SetTxKeys(const ::google::protobuf::RepeatedField<::NProtoBuf::uint32>& columnTags) +void TValidatedWriteTx::SetTxKeys(const TUserTable& tableInfo) { + auto columnsWrites = GetColumnWrites(); + TVector keyCells; - for (ui32 rowIdx = 0; rowIdx KeyColumnIds.size() - 1, keyCells); + Matrix.GetSubmatrix(rowIdx, rowIdx, 0, tableInfo.KeyColumnIds.size() - 1, keyCells); + + LOG_T("Table " << tableInfo.Path << ", shard: " << TabletId << ", " + << "write point " << DebugPrintPoint(tableInfo.KeyColumnTypes, keyCells, *AppData()->TypeRegistry)); - LOG_TRACE_S(Ctx, NKikimrServices::TX_DATASHARD, "Table " << TableInfo->Path << ", shard: " << TabletId << ", " - << "write point " << DebugPrintPoint(TableInfo->KeyColumnTypes, keyCells, *AppData()->TypeRegistry)); TTableRange tableRange(keyCells); - KeyValidator.AddWriteRange(TableId, tableRange, TableInfo->KeyColumnTypes, GetColumnWrites(columnTags), false); + KeyValidator.AddWriteRange(TableId, tableRange, tableInfo.KeyColumnTypes, columnsWrites, false); } } @@ -201,8 +223,6 @@ ui32 TValidatedWriteTx::ExtractKeys(bool allowErrors) if (!HasOperations()) return 0; - SetTxKeys(RecordOperation().GetColumnIds()); - bool isValid = ReValidateKeys(); if (allowErrors) { if (!isValid) { @@ -239,6 +259,9 @@ bool TValidatedWriteTx::CheckCancelled() { } void TValidatedWriteTx::ReleaseTxData() { + Matrix.ReleaseBuffer(); + ColumnIds.clear(); + KqpLocks.reset(); IsReleased = true; NActors::NMemory::TLabel::Sub(TxSize); @@ -248,6 +271,11 @@ void TValidatedWriteTx::ReleaseTxData() { void TValidatedWriteTx::ComputeTxSize() { TxSize = sizeof(TValidatedWriteTx); + TxSize += Matrix.GetBuffer().size(); + TxSize += ColumnIds.size() * sizeof(ui32); + + if (KqpLocks) + TxSize += KqpLocks->ByteSize(); } TWriteOperation* TWriteOperation::CastWriteOperation(TOperation::TPtr op) @@ -258,22 +286,31 @@ TWriteOperation* TWriteOperation::CastWriteOperation(TOperation::TPtr op) return writeOp; } -TWriteOperation::TWriteOperation(const TBasicOpInfo& op, NEvents::TDataEvents::TEvWrite::TPtr ev, TDataShard* self, TTransactionContext& txc, const TActorContext& ctx) +TWriteOperation::TWriteOperation(const TBasicOpInfo& op, ui64 tabletId) : TOperation(op) - , Ev(ev) - , TabletId(self->TabletID()) - , Ctx(ctx) + , TabletId(tabletId) , ArtifactFlags(0) , TxCacheUsage(0) , ReleasedTxDataSize(0) , SchemeShardId(0) , SubDomainPathId(0) { - SetTarget(Ev->Sender); - SetCookie(Ev->Cookie); - Orbit = std::move(Ev->Get()->MoveOrbit()); + TrackMemory(); +} + +TWriteOperation::TWriteOperation(const TBasicOpInfo& op, NEvents::TDataEvents::TEvWrite::TPtr&& ev, TDataShard* self, TTransactionContext& txc) + : TWriteOperation(op, self->TabletID()) +{ + SetTarget(ev->Sender); + SetCookie(ev->Cookie); - BuildWriteTx(self, txc, ctx); + TAutoPtr> handle = ev.Release(); + TAutoPtr evPtr = handle->Release(); + + Orbit = std::move(evPtr->MoveOrbit()); + WriteRequest.reset(evPtr.Release()); + + BuildWriteTx(self, txc); TrackMemory(); } @@ -286,58 +323,100 @@ TWriteOperation::~TWriteOperation() void TWriteOperation::FillTxData(TValidatedWriteTx::TPtr writeTx) { Y_ABORT_UNLESS(!WriteTx); - Y_ABORT_UNLESS(!Ev || HasVolatilePrepareFlag()); + Y_ABORT_UNLESS(!WriteRequest || HasVolatilePrepareFlag()); Target = writeTx->GetSource(); WriteTx = writeTx; } -void TWriteOperation::FillTxData(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx, const TActorId& target, NEvents::TDataEvents::TEvWrite::TPtr&& ev, const TVector& locks, ui64 artifactFlags) +void TWriteOperation::FillTxData(TDataShard* self, TTransactionContext& txc, const TActorId& target, const TString& txBody, const TVector& locks, ui64 artifactFlags) { UntrackMemory(); Y_ABORT_UNLESS(!WriteTx); - Y_ABORT_UNLESS(!Ev); + Y_ABORT_UNLESS(!WriteRequest); Target = target; - Ev = std::move(ev); + SetTxBody(txBody); + if (locks.size()) { for (auto lock : locks) LocksCache().Locks[lock.LockId] = lock; } ArtifactFlags = artifactFlags; Y_ABORT_UNLESS(!WriteTx); - BuildWriteTx(self, txc, ctx); + BuildWriteTx(self, txc); Y_ABORT_UNLESS(WriteTx->Ready()); TrackMemory(); } -void TWriteOperation::FillVolatileTxData(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx) +void TWriteOperation::FillVolatileTxData(TDataShard* self, TTransactionContext& txc) { UntrackMemory(); Y_ABORT_UNLESS(!WriteTx); - Y_ABORT_UNLESS(Ev); + Y_ABORT_UNLESS(WriteRequest); - BuildWriteTx(self, txc, ctx); + BuildWriteTx(self, txc); Y_ABORT_UNLESS(WriteTx->Ready()); TrackMemory(); } -TValidatedWriteTx::TPtr TWriteOperation::BuildWriteTx(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx) +TString TWriteOperation::GetTxBody() const { + Y_ABORT_UNLESS(WriteRequest); + + TAllocChunkSerializer serializer; + bool success = WriteRequest->SerializeToArcadiaStream(&serializer); + Y_ABORT_UNLESS(success); + TEventSerializationInfo serializationInfo = WriteRequest->CreateSerializationInfo(); + + NKikimrTxDataShard::TSerializedEvent proto; + proto.SetIsExtendedFormat(serializationInfo.IsExtendedFormat); + proto.SetEventData(serializer.Release(std::move(serializationInfo))->GetString()); + + TString str; + success = proto.SerializeToString(&str); + Y_ABORT_UNLESS(success); + return str; +} + +void TWriteOperation::SetTxBody(const TString& txBody) { + Y_ABORT_UNLESS(!WriteRequest); + + NKikimrTxDataShard::TSerializedEvent proto; + const bool success = proto.ParseFromString(txBody); + Y_ABORT_UNLESS(success); + + TEventSerializationInfo serializationInfo; + serializationInfo.IsExtendedFormat = proto.GetIsExtendedFormat(); + + TEventSerializedData buffer(proto.GetEventData(), std::move(serializationInfo)); + NKikimr::NEvents::TDataEvents::TEvWrite* writeRequest = static_cast(NKikimr::NEvents::TDataEvents::TEvWrite::Load(&buffer)); + Y_ABORT_UNLESS(writeRequest); + + WriteRequest.reset(writeRequest); +} + +void TWriteOperation::ClearTxBody() { + UntrackMemory(); + WriteRequest.reset(); + TrackMemory(); +} + +TValidatedWriteTx::TPtr TWriteOperation::BuildWriteTx(TDataShard* self, TTransactionContext& txc) { if (!WriteTx) { - Y_ABORT_UNLESS(Ev); + Y_ABORT_UNLESS(WriteRequest); auto [readVersion, writeVersion] = self->GetReadWriteVersions(this); - WriteTx = std::make_shared(self, txc, ctx, GetGlobalTxId(), GetReceivedAt(), readVersion, writeVersion, Ev); + WriteTx = std::make_shared(self, txc, GetGlobalTxId(), GetReceivedAt(), readVersion, writeVersion, *WriteRequest); } return WriteTx; } -void TWriteOperation::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& provider, const TActorContext& ctx) { +void TWriteOperation::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& provider) { ReleasedTxDataSize = provider.GetMemoryLimit() + provider.GetRequestedMemory(); if (!WriteTx || IsTxDataReleased()) @@ -346,9 +425,7 @@ void TWriteOperation::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& WriteTx->ReleaseTxData(); // Immediate transactions have no body stored. if (!IsImmediate() && !HasVolatilePrepareFlag()) { - UntrackMemory(); - Ev.Reset(); - TrackMemory(); + ClearTxBody(); } //InReadSets.clear(); @@ -357,7 +434,7 @@ void TWriteOperation::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& LocksCache().Locks.clear(); ArtifactFlags = 0; - LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "tx " << GetTxId() << " released its data"); + LOG_D("tx " << GetTxId() << " released its data"); } void TWriteOperation::DbStoreLocksAccessLog(NTable::TDatabase& txcDb) @@ -378,7 +455,7 @@ void TWriteOperation::DbStoreLocksAccessLog(NTable::TDatabase& txcDb) TStringBuf vecData(vecDataStart, vecDataSize); db.Table().Key(GetTxId()).Update(NIceDb::TUpdate(vecData)); - LOG_TRACE_S(Ctx, NKikimrServices::TX_DATASHARD, "Storing " << vec.size() << " locks for txid=" << GetTxId() << " in " << TabletId); + LOG_T("Storing " << vec.size() << " locks for txid=" << GetTxId() << " in " << TabletId); } void TWriteOperation::DbStoreArtifactFlags(NTable::TDatabase& txcDb) @@ -388,7 +465,7 @@ void TWriteOperation::DbStoreArtifactFlags(NTable::TDatabase& txcDb) NIceDb::TNiceDb db(txcDb); db.Table().Key(GetTxId()).Update(ArtifactFlags); - LOG_TRACE_S(Ctx, NKikimrServices::TX_DATASHARD, "Storing artifactflags=" << ArtifactFlags << " for txid=" << GetTxId() << " in " << TabletId); + LOG_T("Storing artifactflags=" << ArtifactFlags << " for txid=" << GetTxId() << " in " << TabletId); } ui64 TWriteOperation::GetMemoryConsumption() const { @@ -396,25 +473,14 @@ ui64 TWriteOperation::GetMemoryConsumption() const { if (WriteTx) { res += WriteTx->GetTxSize(); } - if (Ev) { - res += sizeof(NEvents::TDataEvents::TEvWrite); + if (WriteRequest) { + res += WriteRequest->CalculateSerializedSize(); } - return res; } -ERestoreDataStatus TWriteOperation::RestoreTxData( - TDataShard* self, - TTransactionContext& txc, - const TActorContext& ctx -) +ERestoreDataStatus TWriteOperation::RestoreTxData(TDataShard* self, TTransactionContext& txc) { - // TODO - Y_UNUSED(self); - Y_UNUSED(txc); - Y_UNUSED(ctx); - Y_ABORT(); - /* if (!WriteTx) { ReleasedTxDataSize = 0; return ERestoreDataStatus::Ok; @@ -429,14 +495,18 @@ ERestoreDataStatus TWriteOperation::RestoreTxData( TVector locks; if (!IsImmediate() && !HasVolatilePrepareFlag()) { NIceDb::TNiceDb db(txc.DB); - bool ok = self->TransQueue.LoadTxDetails(db, GetTxId(), Target, Ev, locks, ArtifactFlags); + + TString txBody; + bool ok = self->TransQueue.LoadTxDetails(db, GetTxId(), Target, txBody, locks, ArtifactFlags); if (!ok) { - Ev.Reset(); + WriteRequest.reset(); ArtifactFlags = 0; return ERestoreDataStatus::Restart; } + + SetTxBody(txBody); } else { - Y_ABORT_UNLESS(Ev); + Y_ABORT_UNLESS(WriteRequest); } TrackMemory(); @@ -446,9 +516,10 @@ ERestoreDataStatus TWriteOperation::RestoreTxData( bool extractKeys = WriteTx->IsTxInfoLoaded(); auto [readVersion, writeVersion] = self->GetReadWriteVersions(this); - WriteTx = std::make_shared(self, txc, ctx, GetStepOrder(), GetReceivedAt(), readVersion, writeVersion, Ev); + + WriteTx = std::make_shared(self, txc, GetTxId(), GetReceivedAt(), readVersion, writeVersion, *WriteRequest); if (WriteTx->Ready() && extractKeys) { - WriteTx->ExtractKeys(); + WriteTx->ExtractKeys(true); } if (!WriteTx->Ready()) { @@ -456,99 +527,19 @@ ERestoreDataStatus TWriteOperation::RestoreTxData( } ReleasedTxDataSize = 0; - */ - LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "tx " << GetTxId() << " at " << self->TabletID() << " restored its data"); + LOG_D("tx " << GetTxId() << " at " << self->TabletID() << " restored its data"); return ERestoreDataStatus::Ok; } -void TWriteOperation::FinalizeWriteTxPlan() -{ - Y_ABORT_UNLESS(IsWriteTx()); - Y_ABORT_UNLESS(!IsImmediate()); - Y_ABORT_UNLESS(!IsKqpScanTransaction()); - - TVector plan; - - plan.push_back(EExecutionUnitKind::BuildAndWaitDependencies); - if (IsKqpDataTransaction()) { - plan.push_back(EExecutionUnitKind::BuildKqpDataTxOutRS); - plan.push_back(EExecutionUnitKind::StoreAndSendOutRS); - plan.push_back(EExecutionUnitKind::PrepareKqpDataTxInRS); - plan.push_back(EExecutionUnitKind::LoadAndWaitInRS); - plan.push_back(EExecutionUnitKind::ExecuteKqpDataTx); - } else { - plan.push_back(EExecutionUnitKind::BuildDataTxOutRS); - plan.push_back(EExecutionUnitKind::StoreAndSendOutRS); - plan.push_back(EExecutionUnitKind::PrepareDataTxInRS); - plan.push_back(EExecutionUnitKind::LoadAndWaitInRS); - plan.push_back(EExecutionUnitKind::ExecuteDataTx); - } - plan.push_back(EExecutionUnitKind::CompleteOperation); - plan.push_back(EExecutionUnitKind::CompletedOperations); - - RewriteExecutionPlan(plan); -} - -class TFinalizeWriteTxPlanUnit: public TExecutionUnit { -public: - TFinalizeWriteTxPlanUnit(TDataShard& dataShard, TPipeline& pipeline) - : TExecutionUnit(EExecutionUnitKind::FinalizeWriteTxPlan, false, dataShard, pipeline) - { - } - - bool IsReadyToExecute(TOperation::TPtr) const override { - return true; - } - - EExecutionStatus Execute(TOperation::TPtr op, TTransactionContext& txc, const TActorContext& ctx) override { - Y_UNUSED(txc); - Y_UNUSED(ctx); - - TWriteOperation* writeOp = TWriteOperation::CastWriteOperation(op); - - writeOp->FinalizeWriteTxPlan(); - - return EExecutionStatus::Executed; - } - - void Complete(TOperation::TPtr op, const TActorContext& ctx) override { - Y_UNUSED(op); - Y_UNUSED(ctx); - } -}; - -THolder CreateFinalizeWriteTxPlanUnit(TDataShard& dataShard, TPipeline& pipeline) { - return THolder(new TFinalizeWriteTxPlanUnit(dataShard, pipeline)); -} - -void TWriteOperation::TrackMemory() const { - // TODO More accurate calc memory - NActors::NMemory::TLabel::Add(GetRecord().SpaceUsed()); -} - -void TWriteOperation::UntrackMemory() const { - NActors::NMemory::TLabel::Sub(GetRecord().SpaceUsed()); -} - -void TWriteOperation::SetError(const NKikimrDataEvents::TEvWriteResult::EStatus& status, const TString& errorMsg) { - SetAbortedFlag(); - WriteResult = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletId, GetTxId(), status, errorMsg); -} - -void TWriteOperation::SetWriteResult(std::unique_ptr&& writeResult) { - WriteResult = std::move(writeResult); -} - void TWriteOperation::BuildExecutionPlan(bool loaded) { Y_ABORT_UNLESS(GetExecutionPlan().empty()); - Y_ABORT_UNLESS(!loaded); TVector plan; - //if (IsImmediate()) + if (IsImmediate()) { Y_ABORT_UNLESS(!loaded); plan.push_back(EExecutionUnitKind::CheckWrite); @@ -556,10 +547,11 @@ void TWriteOperation::BuildExecutionPlan(bool loaded) plan.push_back(EExecutionUnitKind::ExecuteWrite); plan.push_back(EExecutionUnitKind::FinishProposeWrite); plan.push_back(EExecutionUnitKind::CompletedOperations); - } + } /* else if (HasVolatilePrepareFlag()) { - plan.push_back(EExecutionUnitKind::StoreDataTx); // note: stores in memory + Y_ABORT_UNLESS(!loaded); + plan.push_back(EExecutionUnitKind::StoreWrite); // note: stores in memory plan.push_back(EExecutionUnitKind::FinishProposeWrite); Y_ABORT_UNLESS(!GetStep()); plan.push_back(EExecutionUnitKind::WaitForPlan); @@ -567,23 +559,49 @@ void TWriteOperation::BuildExecutionPlan(bool loaded) plan.push_back(EExecutionUnitKind::LoadTxDetails); // note: reloads from memory plan.push_back(EExecutionUnitKind::BuildAndWaitDependencies); plan.push_back(EExecutionUnitKind::ExecuteWrite); - plan.push_back(EExecutionUnitKind::CompleteOperation); + plan.push_back(EExecutionUnitKind::CompleteWrite); plan.push_back(EExecutionUnitKind::CompletedOperations); - } else { + */ + else { if (!loaded) { plan.push_back(EExecutionUnitKind::CheckWrite); - plan.push_back(EExecutionUnitKind::StoreDataTx); + plan.push_back(EExecutionUnitKind::StoreWrite); plan.push_back(EExecutionUnitKind::FinishProposeWrite); } if (!GetStep()) plan.push_back(EExecutionUnitKind::WaitForPlan); plan.push_back(EExecutionUnitKind::PlanQueue); - plan.push_back(EExecutionUnitKind::LoadTxDetails); - plan.push_back(EExecutionUnitKind::FinalizeWriteTxPlan); - } */ + plan.push_back(EExecutionUnitKind::LoadWriteDetails); + + plan.push_back(EExecutionUnitKind::BuildAndWaitDependencies); + + plan.push_back(EExecutionUnitKind::PrepareWriteTxInRS); + plan.push_back(EExecutionUnitKind::LoadAndWaitInRS); + plan.push_back(EExecutionUnitKind::ExecuteWrite); + + plan.push_back(EExecutionUnitKind::CompleteWrite); + plan.push_back(EExecutionUnitKind::CompletedOperations); + } RewriteExecutionPlan(plan); } +void TWriteOperation::TrackMemory() const { + NActors::NMemory::TLabel::Add(WriteRequest ? WriteRequest->CalculateSerializedSize() : 0); +} + +void TWriteOperation::UntrackMemory() const { + NActors::NMemory::TLabel::Sub(WriteRequest ? WriteRequest->CalculateSerializedSize() : 0); +} + +void TWriteOperation::SetError(const NKikimrDataEvents::TEvWriteResult::EStatus& status, const TString& errorMsg) { + SetAbortedFlag(); + WriteResult = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletId, GetTxId(), status, errorMsg); +} + +void TWriteOperation::SetWriteResult(std::unique_ptr&& writeResult) { + WriteResult = std::move(writeResult); +} + } // NDataShard } // NKikimr diff --git a/ydb/core/tx/datashard/datashard_write_operation.h b/ydb/core/tx/datashard/datashard_write_operation.h index 345e30687c5e..c756e5017701 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.h +++ b/ydb/core/tx/datashard/datashard_write_operation.h @@ -1,57 +1,47 @@ #pragma once -#include "datashard_impl.h" -#include -#include "datashard__engine_host.h" +#include "key_validator.h" #include "datashard_user_db.h" +#include "datashard_user_table.h" #include "operation.h" #include +#include #include +#include #include namespace NKikimr { namespace NDataShard { - -class TValidatedWriteTx: TNonCopyable { +class TValidatedWriteTx: TNonCopyable, public TValidatedTx { public: using TPtr = std::shared_ptr; - TValidatedWriteTx(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx, ui64 globalTxId, TInstant receivedAt, const TRowVersion& readVersion, const TRowVersion& writeVersion, const NEvents::TDataEvents::TEvWrite::TPtr& ev); + TValidatedWriteTx(TDataShard* self, TTransactionContext& txc, ui64 globalTxId, TInstant receivedAt, const TRowVersion& readVersion, const TRowVersion& writeVersion, const NEvents::TDataEvents::TEvWrite& ev); ~TValidatedWriteTx(); + EType GetType() const override { + return EType::WriteTx; + }; + static constexpr ui64 MaxReorderTxKeys() { return 100; } - const NEvents::TDataEvents::TEvWrite::TPtr& GetEv() const { - return Ev; - } - - const NKikimrDataEvents::TEvWrite& GetRecord() const { - return Ev->Get()->Record; - } - - const NKikimrDataEvents::TEvWrite::TOperation& RecordOperation() const { - Y_ABORT_UNLESS(GetRecord().operations().size() == 1, "Only one operation is supported now"); - Y_ABORT_UNLESS(GetRecord().operations(0).GetType() == NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, "Only UPSERT operation is supported now"); - return GetRecord().operations(0); - } - - ui64 GetTxId() const { + ui64 GetTxId() const override { return UserDb.GetGlobalTxId(); } ui64 LockTxId() const { - return GetRecord().locktxid(); + return UserDb.GetLockTxId(); } ui32 LockNodeId() const { - return GetRecord().locknodeid(); + return UserDb.GetLockNodeId(); } bool Immediate() const { - return GetRecord().txmode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE; + return UserDb.GetIsImmediateTx(); } bool NeedDiagnostics() const { return true; @@ -65,9 +55,6 @@ class TValidatedWriteTx: TNonCopyable { bool RequirePrepare() const { return ErrCode == NKikimrTxDataShard::TError::SNAPSHOT_NOT_READY_YET; } - bool RequireWrites() const { - return TxInfo().HasWrites() || !Immediate(); - } bool HasWrites() const { return TxInfo().HasWrites(); } @@ -127,33 +114,22 @@ class TValidatedWriteTx: TNonCopyable { return UserDb.GetVolatileCommitOrdered(); } - bool IsProposed() const { - return Source != TActorId(); - } - - const ::NKikimrDataEvents::TKqpLocks& GetKqpLocks() const { - return GetRecord().locks(); - } - bool HasKqpLocks() const { - return GetRecord().has_locks(); - } - - bool ParseOperations(const TDataShard::TTableInfos& tableInfos); - void SetTxKeys(const ::google::protobuf::RepeatedField<::NProtoBuf::uint32>& columnIds); - ui32 ExtractKeys(bool allowErrors); bool ReValidateKeys(); - + ui64 HasOperations() const { - return GetRecord().operations().size() != 0; + return Matrix.GetRowCount() != 0; } - ui32 KeysCount() const { return TxInfo().WritesCount; } void ReleaseTxData(); + ui64 GetMemoryConsumption() const override { + return GetTxSize(); + } + bool IsTxInfoLoaded() const { return TxInfo().Loaded; } @@ -170,17 +146,22 @@ class TValidatedWriteTx: TNonCopyable { } private: - const NEvents::TDataEvents::TEvWrite::TPtr& Ev; + bool ParseOperation(const NEvents::TDataEvents::TEvWrite& ev, const NKikimrDataEvents::TEvWrite::TOperation& recordOperation, const TUserTable::TTableInfos& tableInfos); + void SetTxKeys(const TUserTable& tableInfo); + TVector GetColumnWrites() const; + + void ComputeTxSize(); + +private: TDataShardUserDb UserDb; TKeyValidator KeyValidator; NMiniKQL::TEngineHostCounters EngineHostCounters; const ui64 TabletId; - const TActorContext& Ctx; - - YDB_ACCESSOR_DEF(TActorId, Source); YDB_READONLY_DEF(TTableId, TableId); + YDB_READONLY_DEF(std::optional, KqpLocks); + YDB_READONLY_DEF(std::vector, ColumnIds); YDB_READONLY_DEF(TSerializedCellMatrix, Matrix); YDB_READONLY_DEF(TInstant, ReceivedAt); @@ -189,41 +170,29 @@ class TValidatedWriteTx: TNonCopyable { YDB_READONLY_DEF(NKikimrTxDataShard::TError::EKind, ErrCode); YDB_READONLY_DEF(TString, ErrStr); YDB_READONLY_DEF(bool, IsReleased); - - const TUserTable* TableInfo; -private: - void ComputeTxSize(); }; class TWriteOperation : public TOperation { - friend class TWriteUnit; + friend class TExecuteWriteUnit; public: + using TPtr = TIntrusivePtr; + static TWriteOperation* CastWriteOperation(TOperation::TPtr op); - - explicit TWriteOperation(const TBasicOpInfo& op, NEvents::TDataEvents::TEvWrite::TPtr ev, TDataShard* self, TTransactionContext& txc, const TActorContext& ctx); + explicit TWriteOperation(const TBasicOpInfo& op, ui64 tabletId); + explicit TWriteOperation(const TBasicOpInfo& op, NEvents::TDataEvents::TEvWrite::TPtr&& ev, TDataShard* self, TTransactionContext& txc); ~TWriteOperation(); void FillTxData(TValidatedWriteTx::TPtr dataTx); - void FillTxData(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx, const TActorId& target, NEvents::TDataEvents::TEvWrite::TPtr&& ev, const TVector& locks, ui64 artifactFlags); - void FillVolatileTxData(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx); + void FillTxData(TDataShard* self, TTransactionContext& txc, const TActorId& target, const TString& txBody, const TVector& locks, ui64 artifactFlags); + void FillVolatileTxData(TDataShard* self, TTransactionContext& txc); - const NEvents::TDataEvents::TEvWrite::TPtr& GetEv() const { - return Ev; - } - void SetEv(const NEvents::TDataEvents::TEvWrite::TPtr& ev) { - UntrackMemory(); - Ev = ev; - TrackMemory(); - } - void ClearEv() { - UntrackMemory(); - Ev.Reset(); - TrackMemory(); - } + TString GetTxBody() const; + void SetTxBody(const TString& txBody); + void ClearTxBody(); void Deactivate() override { - ClearEv(); + ClearTxBody(); TOperation::Deactivate(); } @@ -277,9 +246,8 @@ class TWriteOperation : public TOperation { return requiredMem; } - void ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& provider, const TActorContext& ctx); - ERestoreDataStatus RestoreTxData(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx); - void FinalizeWriteTxPlan(); + void ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& provider); + ERestoreDataStatus RestoreTxData(TDataShard* self, TTransactionContext& txc); // TOperation iface. void BuildExecutionPlan(bool loaded) override; @@ -320,16 +288,12 @@ class TWriteOperation : public TOperation { TValidatedWriteTx::TPtr& GetWriteTx() { return WriteTx; } - TValidatedWriteTx::TPtr BuildWriteTx(TDataShard* self, TTransactionContext& txc, const TActorContext& ctx); + TValidatedWriteTx::TPtr BuildWriteTx(TDataShard* self, TTransactionContext& txc); void ClearWriteTx() { WriteTx = nullptr; } - const NKikimrDataEvents::TEvWrite& GetRecord() const { - return Ev->Get()->Record; - } - const std::unique_ptr& GetWriteResult() const { return WriteResult; } @@ -345,12 +309,12 @@ class TWriteOperation : public TOperation { void UntrackMemory() const; private: - NEvents::TDataEvents::TEvWrite::TPtr Ev; - TValidatedWriteTx::TPtr WriteTx; + std::unique_ptr WriteRequest; std::unique_ptr WriteResult; + TValidatedWriteTx::TPtr WriteTx; + const ui64 TabletId; - const TActorContext& Ctx; YDB_READONLY_DEF(ui64, ArtifactFlags); YDB_ACCESSOR_DEF(ui64, TxCacheUsage); diff --git a/ydb/core/tx/datashard/execute_data_tx_unit.cpp b/ydb/core/tx/datashard/execute_data_tx_unit.cpp index a2976e05bd82..00a0baa51b69 100644 --- a/ydb/core/tx/datashard/execute_data_tx_unit.cpp +++ b/ydb/core/tx/datashard/execute_data_tx_unit.cpp @@ -313,7 +313,7 @@ void TExecuteDataTxUnit::ExecuteDataTx(TOperation::TPtr op, KqpUpdateDataShardStatCounters(DataShard, counters); if (tx->GetDataTx()->CollectStats()) { - KqpFillTxStats(DataShard, counters, *result); + KqpFillTxStats(DataShard, counters, *result->Record.MutableTxStats()); } if (counters.InvisibleRowSkips && op->LockTxId()) { diff --git a/ydb/core/tx/datashard/write_unit.cpp b/ydb/core/tx/datashard/execute_write_unit.cpp similarity index 87% rename from ydb/core/tx/datashard/write_unit.cpp rename to ydb/core/tx/datashard/execute_write_unit.cpp index 01be4e9b4c89..63f7ba8d3f46 100644 --- a/ydb/core/tx/datashard/write_unit.cpp +++ b/ydb/core/tx/datashard/execute_write_unit.cpp @@ -10,19 +10,19 @@ namespace NKikimr { namespace NDataShard { -class TWriteUnit : public TExecutionUnit { +class TExecuteWriteUnit : public TExecutionUnit { public: - TWriteUnit(TDataShard& self, TPipeline& pipeline) + TExecuteWriteUnit(TDataShard& self, TPipeline& pipeline) : TExecutionUnit(EExecutionUnitKind::ExecuteWrite, true, self, pipeline) { } - ~TWriteUnit() + ~TExecuteWriteUnit() { } bool IsReadyToExecute(TOperation::TPtr op) const override { - if (op->HasRuntimeConflicts() || op->HasWaitingForGlobalTxIdFlag()) { + if (op->HasWaitingForGlobalTxIdFlag()) { return false; } @@ -107,7 +107,7 @@ class TWriteUnit : public TExecutionUnit { ops.reserve(matrix.GetColCount() - TableInfo_.KeyColumnIds.size()); for (ui16 valueColIdx = TableInfo_.KeyColumnIds.size(); valueColIdx < matrix.GetColCount(); ++valueColIdx) { - ui32 columnTag = writeTx->RecordOperation().GetColumnIds(valueColIdx); + ui32 columnTag = writeTx->GetColumnIds()[valueColIdx]; const TCell& cell = matrix.GetCell(rowIdx, valueColIdx); NScheme::TTypeInfo vtypeInfo = scheme.GetColumnInfo(tableInfo, columnTag)->PType; @@ -140,10 +140,31 @@ class TWriteUnit : public TExecutionUnit { // Every time we execute immediate transaction we may choose a new mvcc version op->MvccReadWriteVersion.reset(); } - else { - //TODO: Prepared - writeOp->SetWriteResult(NEvents::TDataEvents::TEvWriteResult::BuildPrepared(DataShard.TabletID(), op->GetTxId(), {0, 0, {}})); - return EExecutionStatus::DelayCompleteNoMoreRestarts; + + const TValidatedWriteTx::TPtr& writeTx = writeOp->GetWriteTx(); + + DataShard.ReleaseCache(*writeOp); + writeTx->GetUserDb().ResetCounters(); + + if (writeOp->IsTxDataReleased()) { + switch (Pipeline.RestoreDataTx(writeOp, txc)) { + case ERestoreDataStatus::Ok: + break; + + case ERestoreDataStatus::Restart: + return EExecutionStatus::Restart; + + case ERestoreDataStatus::Error: + // For immediate transactions we want to translate this into a propose failure + if (op->IsImmediate()) { + Y_ABORT_UNLESS(!writeTx->Ready()); + writeOp->SetError(NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, writeTx->GetErrStr()); + return EExecutionStatus::Executed; + } + + // For planned transactions errors are not expected + Y_ABORT("Failed to restore tx data: %s", writeTx->GetErrStr().c_str()); + } } TDataShardLocksDb locksDb(DataShard, txc); @@ -151,8 +172,6 @@ class TWriteUnit : public TExecutionUnit { ui64 tabletId = DataShard.TabletID(); - const TValidatedWriteTx::TPtr& writeTx = writeOp->GetWriteTx(); - if (op->IsImmediate() && !writeOp->ReValidateKeys()) { // Immediate transactions may be reordered with schema changes and become invalid Y_ABORT_UNLESS(!writeTx->Ready()); @@ -161,7 +180,7 @@ class TWriteUnit : public TExecutionUnit { } if (writeTx->CheckCancelled()) { - writeOp->ReleaseTxData(txc, ctx); + writeOp->ReleaseTxData(txc); writeOp->SetError(NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED, "Tx was cancelled"); DataShard.IncCounter(COUNTER_WRITE_CANCELLED); return EExecutionStatus::Executed; @@ -169,7 +188,7 @@ class TWriteUnit : public TExecutionUnit { try { const ui64 txId = writeTx->GetTxId(); - const auto* kqpLocks = writeTx->HasKqpLocks() ? &writeTx->GetKqpLocks() : nullptr; + const auto* kqpLocks = writeTx->GetKqpLocks() ? &writeTx->GetKqpLocks().value() : nullptr; const auto& inReadSets = op->InReadSets(); auto& awaitingDecisions = op->AwaitingDecisions(); auto& outReadSets = op->OutReadSets(); @@ -260,9 +279,14 @@ class TWriteUnit : public TExecutionUnit { writeOp->SetWriteResult(NEvents::TDataEvents::TEvWriteResult::BuildCompleted(DataShard.TabletID(), writeOp->GetTxId())); + auto& writeResult = writeOp->GetWriteResult(); + writeResult->Record.SetOrderId(op->GetTxId()); + if (!op->IsImmediate()) + writeResult->Record.SetStep(op->GetStep()); + if (Pipeline.AddLockDependencies(op, guardLocks)) { writeTx->ResetCollectedChanges(); - writeOp->ReleaseTxData(txc, ctx); + writeOp->ReleaseTxData(txc); if (txc.DB.HasChanges()) { txc.DB.RollbackChanges(); } @@ -306,6 +330,10 @@ class TWriteUnit : public TExecutionUnit { op->ChangeRecords() = std::move(changes); } + auto& counters = writeTx->GetUserDb().GetCounters(); + KqpUpdateDataShardStatCounters(DataShard, counters); + KqpFillTxStats(DataShard, counters, *writeResult->Record.MutableTxStats()); + } catch (const TNeedGlobalTxId&) { Y_VERIFY_S(op->GetGlobalTxId() == 0, "Unexpected TNeedGlobalTxId exception for write operation with TxId# " << op->GetGlobalTxId()); @@ -339,26 +367,13 @@ class TWriteUnit : public TExecutionUnit { return EExecutionStatus::DelayCompleteNoMoreRestarts; } - void Complete(TOperation::TPtr op, const TActorContext& ctx) override { - Pipeline.RemoveCommittingOp(op); - DataShard.EnqueueChangeRecords(std::move(op->ChangeRecords())); - DataShard.EmitHeartbeats(); - - TWriteOperation* writeOp = TWriteOperation::CastWriteOperation(op); - - const auto& status = writeOp->GetWriteResult()->Record.status(); - LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Completed write operation for " << *op << " at " << DataShard.TabletID() << ", status " << status); - - DataShard.IncCounter(writeOp->GetWriteResult()->Record.status() == NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED ? - COUNTER_WRITE_SUCCESS : COUNTER_WRITE_ERROR); - - ctx.Send(writeOp->GetEv()->Sender, writeOp->ReleaseWriteResult().release(), 0, writeOp->GetEv()->Cookie); + void Complete(TOperation::TPtr, const TActorContext&) override { } -}; // TWriteUnit +}; // TExecuteWriteUnit -THolder CreateWriteUnit(TDataShard& self, TPipeline& pipeline) { - return THolder(new TWriteUnit(self, pipeline)); +THolder CreateExecuteWriteUnit(TDataShard& self, TPipeline& pipeline) { + return THolder(new TExecuteWriteUnit(self, pipeline)); } } // NDataShard diff --git a/ydb/core/tx/datashard/execution_unit.cpp b/ydb/core/tx/datashard/execution_unit.cpp index 2d76a6ed186e..93a2eed6ad51 100644 --- a/ydb/core/tx/datashard/execution_unit.cpp +++ b/ydb/core/tx/datashard/execution_unit.cpp @@ -24,6 +24,8 @@ THolder CreateExecutionUnit(EExecutionUnitKind kind, return CreateCheckCommitWritesTxUnit(dataShard, pipeline); case EExecutionUnitKind::StoreDataTx: return CreateStoreDataTxUnit(dataShard, pipeline); + case EExecutionUnitKind::StoreWrite: + return CreateStoreWriteUnit(dataShard, pipeline); case EExecutionUnitKind::StoreSchemeTx: return CreateStoreSchemeTxUnit(dataShard, pipeline); case EExecutionUnitKind::StoreSnapshotTx: @@ -46,10 +48,10 @@ THolder CreateExecutionUnit(EExecutionUnitKind kind, return CreatePlanQueueUnit(dataShard, pipeline); case EExecutionUnitKind::LoadTxDetails: return CreateLoadTxDetailsUnit(dataShard, pipeline); + case EExecutionUnitKind::LoadWriteDetails: + return CreateLoadWriteDetailsUnit(dataShard, pipeline); case EExecutionUnitKind::FinalizeDataTxPlan: return CreateFinalizeDataTxPlanUnit(dataShard, pipeline); - case EExecutionUnitKind::FinalizeWriteTxPlan: - return CreateFinalizeWriteTxPlanUnit(dataShard, pipeline); case EExecutionUnitKind::ProtectSchemeEchoes: return CreateProtectSchemeEchoesUnit(dataShard, pipeline); case EExecutionUnitKind::BuildDataTxOutRS: @@ -64,6 +66,8 @@ THolder CreateExecutionUnit(EExecutionUnitKind kind, return CreatePrepareDataTxInRSUnit(dataShard, pipeline); case EExecutionUnitKind::PrepareKqpDataTxInRS: return CreatePrepareKqpDataTxInRSUnit(dataShard, pipeline); + case EExecutionUnitKind::PrepareWriteTxInRS: + return CreatePrepareWriteTxInRSUnit(dataShard, pipeline); case EExecutionUnitKind::PrepareDistributedEraseTxInRS: return CreatePrepareDistributedEraseTxInRSUnit(dataShard, pipeline); case EExecutionUnitKind::LoadAndWaitInRS: @@ -78,6 +82,8 @@ THolder CreateExecutionUnit(EExecutionUnitKind kind, return CreateExecuteCommitWritesTxUnit(dataShard, pipeline); case EExecutionUnitKind::CompleteOperation: return CreateCompleteOperationUnit(dataShard, pipeline); + case EExecutionUnitKind::CompleteWrite: + return CreateCompleteWriteUnit(dataShard, pipeline); case EExecutionUnitKind::ExecuteKqpScanTx: return CreateExecuteKqpScanTxUnit(dataShard, pipeline); case EExecutionUnitKind::MakeScanSnapshot: @@ -139,7 +145,7 @@ THolder CreateExecutionUnit(EExecutionUnitKind kind, case EExecutionUnitKind::ExecuteRead: return CreateReadUnit(dataShard, pipeline); case EExecutionUnitKind::ExecuteWrite: - return CreateWriteUnit(dataShard, pipeline); + return CreateExecuteWriteUnit(dataShard, pipeline); default: Y_FAIL_S("Unexpected execution kind " << kind << " (" << (ui32)kind << ")"); } diff --git a/ydb/core/tx/datashard/execution_unit_ctors.h b/ydb/core/tx/datashard/execution_unit_ctors.h index 1689889a4b5b..e8dce32c483f 100644 --- a/ydb/core/tx/datashard/execution_unit_ctors.h +++ b/ydb/core/tx/datashard/execution_unit_ctors.h @@ -12,7 +12,8 @@ THolder CreateCheckSnapshotTxUnit(TDataShard &dataShard, TPipeli THolder CreateCheckDistributedEraseTxUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateCheckCommitWritesTxUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateStoreDataTxUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreateStoreSchemeTxUnit(TDataShard &dataShard, TPipeline &pipeline); +THolder CreateStoreWriteUnit(TDataShard& dataShard, TPipeline& pipeline); +THolder CreateStoreSchemeTxUnit(TDataShard& dataShard, TPipeline& pipeline); THolder CreateStoreSnapshotTxUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateStoreDistributedEraseTxUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateStoreCommitWritesTxUnit(TDataShard &dataShard, TPipeline &pipeline); @@ -23,8 +24,8 @@ THolder CreateCompletedOperationsUnit(TDataShard& dataShard, TPi THolder CreateWaitForPlanUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreatePlanQueueUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateLoadTxDetailsUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreateFinalizeDataTxPlanUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreateFinalizeWriteTxPlanUnit(TDataShard& dataShard, TPipeline& pipeline); +THolder CreateLoadWriteDetailsUnit(TDataShard& dataShard, TPipeline& pipeline); +THolder CreateFinalizeDataTxPlanUnit(TDataShard& dataShard, TPipeline& pipeline); THolder CreateProtectSchemeEchoesUnit(TDataShard& dataShard, TPipeline& pipeline); THolder CreateBuildDataTxOutRSUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateBuildDistributedEraseTxOutRSUnit(TDataShard &dataShard, TPipeline &pipeline); @@ -32,14 +33,17 @@ THolder CreateBuildKqpDataTxOutRSUnit(TDataShard &dataShard, TPi THolder CreateStoreAndSendOutRSUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreatePrepareDataTxInRSUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreatePrepareKqpDataTxInRSUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreatePrepareDistributedEraseTxInRSUnit(TDataShard &dataShard, TPipeline &pipeline); +THolder CreatePrepareWriteTxInRSUnit(TDataShard& dataShard, TPipeline& pipeline); +THolder CreatePrepareDistributedEraseTxInRSUnit(TDataShard& dataShard, TPipeline& pipeline); THolder CreateLoadAndWaitInRSUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateExecuteDataTxUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreateExecuteKqpDataTxUnit(TDataShard &dataShard, TPipeline &pipeline); +THolder CreateExecuteWriteUnit(TDataShard& dataShard, TPipeline& pipeline); +THolder CreateExecuteKqpDataTxUnit(TDataShard& dataShard, TPipeline& pipeline); THolder CreateExecuteDistributedEraseTxUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateExecuteCommitWritesTxUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateCompleteOperationUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreateExecuteKqpScanTxUnit(TDataShard &dataShard, TPipeline &pipeline); +THolder CreateCompleteWriteUnit(TDataShard& dataShard, TPipeline& pipeline); +THolder CreateExecuteKqpScanTxUnit(TDataShard& dataShard, TPipeline& pipeline); THolder CreateMakeScanSnapshotUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateWaitForStreamClearanceUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateReadTableScanUnit(TDataShard &dataShard, TPipeline &pipeline); @@ -69,7 +73,6 @@ THolder CreateAlterCdcStreamUnit(TDataShard &dataShard, TPipelin THolder CreateDropCdcStreamUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateCheckReadUnit(TDataShard &dataShard, TPipeline &pipeline); THolder CreateReadUnit(TDataShard &dataShard, TPipeline &pipeline); -THolder CreateWriteUnit(TDataShard& dataShard, TPipeline& pipeline); } // namespace NDataShard } // namespace NKikimr diff --git a/ydb/core/tx/datashard/execution_unit_kind.h b/ydb/core/tx/datashard/execution_unit_kind.h index 135dac076c8d..b9e2f74ef6a2 100644 --- a/ydb/core/tx/datashard/execution_unit_kind.h +++ b/ydb/core/tx/datashard/execution_unit_kind.h @@ -13,6 +13,7 @@ enum class EExecutionUnitKind: ui32 { CheckRead, CheckWrite, StoreDataTx, + StoreWrite, StoreSchemeTx, StoreSnapshotTx, StoreDistributedEraseTx, @@ -24,8 +25,8 @@ enum class EExecutionUnitKind: ui32 { WaitForPlan, PlanQueue, LoadTxDetails, + LoadWriteDetails, FinalizeDataTxPlan, - FinalizeWriteTxPlan, ProtectSchemeEchoes, BuildDataTxOutRS, BuildKqpDataTxOutRS, @@ -33,6 +34,7 @@ enum class EExecutionUnitKind: ui32 { StoreAndSendOutRS, PrepareDataTxInRS, PrepareKqpDataTxInRS, + PrepareWriteTxInRS, PrepareDistributedEraseTxInRS, LoadAndWaitInRS, ExecuteDataTx, @@ -42,6 +44,7 @@ enum class EExecutionUnitKind: ui32 { ExecuteRead, ExecuteWrite, CompleteOperation, + CompleteWrite, ExecuteKqpScanTx, MakeScanSnapshot, WaitForStreamClearance, diff --git a/ydb/core/tx/datashard/finalize_plan_tx_unit.cpp b/ydb/core/tx/datashard/finalize_plan_tx_unit.cpp new file mode 100644 index 000000000000..70ccbf9a51b4 --- /dev/null +++ b/ydb/core/tx/datashard/finalize_plan_tx_unit.cpp @@ -0,0 +1,54 @@ +#include "datashard_failpoints.h" +#include "datashard_impl.h" +#include "datashard_pipeline.h" +#include "execution_unit_ctors.h" +#include "probes.h" + +LWTRACE_USING(DATASHARD_PROVIDER) + +namespace NKikimr { +namespace NDataShard { + +class TFinalizeDataTxPlanUnit: public TExecutionUnit { +public: + TFinalizeDataTxPlanUnit(TDataShard& dataShard, TPipeline& pipeline) + : TExecutionUnit(EExecutionUnitKind::FinalizeDataTxPlan, false, dataShard, pipeline) + { + } + + bool IsReadyToExecute(TOperation::TPtr) const override { + return true; + } + + EExecutionStatus Execute(TOperation::TPtr op, TTransactionContext& txc, const TActorContext& ctx) override { + Y_UNUSED(txc); + Y_UNUSED(ctx); + + TActiveTransaction* tx = dynamic_cast(op.Get()); + Y_VERIFY_S(tx, "cannot cast operation of kind " << op->GetKind()); + Y_VERIFY_S(tx->IsDataTx(), "unexpected non-data tx"); + + if (auto& dataTx = tx->GetDataTx()) { + // Restore transaction type flags + if (dataTx->IsKqpDataTx() && !tx->IsKqpDataTransaction()) + tx->SetKqpDataTransactionFlag(); + Y_VERIFY_S(!dataTx->IsKqpScanTx(), "unexpected kqp scan tx"); + } + + tx->FinalizeDataTxPlan(); + + return EExecutionStatus::Executed; + } + + void Complete(TOperation::TPtr op, const TActorContext& ctx) override { + Y_UNUSED(op); + Y_UNUSED(ctx); + } +}; + +THolder CreateFinalizeDataTxPlanUnit(TDataShard& dataShard, TPipeline& pipeline) { + return THolder(new TFinalizeDataTxPlanUnit(dataShard, pipeline)); +} + +} // namespace NDataShard +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/finish_propose_unit.cpp b/ydb/core/tx/datashard/finish_propose_unit.cpp index 7c30cb960093..545a1f88b2c4 100644 --- a/ydb/core/tx/datashard/finish_propose_unit.cpp +++ b/ydb/core/tx/datashard/finish_propose_unit.cpp @@ -180,7 +180,7 @@ void TFinishProposeUnit::CompleteRequest(TOperation::TPtr op, if (op->HasNeedDiagnosticsFlag()) AddDiagnosticsResult(res); - DataShard.FillExecutionStats(op->GetExecutionProfile(), *res); + DataShard.FillExecutionStats(op->GetExecutionProfile(), *res->Record.MutableTxStats()); DataShard.IncCounter(COUNTER_TX_RESULT_SIZE, res->Record.GetTxResult().size()); diff --git a/ydb/core/tx/datashard/finish_propose_write_unit.cpp b/ydb/core/tx/datashard/finish_propose_write_unit.cpp index d6c9dffa10fa..fec8116d9d54 100644 --- a/ydb/core/tx/datashard/finish_propose_write_unit.cpp +++ b/ydb/core/tx/datashard/finish_propose_write_unit.cpp @@ -158,12 +158,10 @@ void TFinishProposeWriteUnit::CompleteRequest(TOperation::TPtr op, const TActorC << DataShard.TabletID() << " send to client, propose latency: " << duration.MilliSeconds() << " ms, status: " << res->GetStatus()); - TString errors = res->GetError(); - if (errors.size()) { + if (res->IsError()) { LOG_LOG_S_THROTTLE(DataShard.GetLogThrottler(TDataShard::ELogThrottlerType::FinishProposeUnit_CompleteRequest), ctx, NActors::NLog::PRI_ERROR, NKikimrServices::TX_DATASHARD, "Errors while proposing transaction txid " << op->GetTxId() - << " at tablet " << DataShard.TabletID() << " status: " - << res->GetStatus() << " errors: " << errors); + << " at tablet " << DataShard.TabletID() << " " << res->GetError()); } if (res->IsPrepared()) { @@ -173,8 +171,9 @@ void TFinishProposeWriteUnit::CompleteRequest(TOperation::TPtr op, const TActorC DataShard.CheckMvccStateChangeCanStart(ctx); } - if (op->HasNeedDiagnosticsFlag()) - AddDiagnosticsResult(*res); + AddDiagnosticsResult(*res); + + DataShard.FillExecutionStats(op->GetExecutionProfile(), *res->Record.MutableTxStats()); if (!gSkipRepliesFailPoint.Check(DataShard.TabletID(), op->GetTxId())) { if (res->IsPrepared()) { @@ -184,7 +183,7 @@ void TFinishProposeWriteUnit::CompleteRequest(TOperation::TPtr op, const TActorC res->SetOrbit(std::move(op->Orbit)); } - ctx.Send(writeOp->GetEv()->Sender, res.release(), 0, writeOp->GetEv()->Cookie); + ctx.Send(op->GetTarget(), res.release(), 0, op->GetCookie()); } } diff --git a/ydb/core/tx/datashard/load_write_details_unit.cpp b/ydb/core/tx/datashard/load_write_details_unit.cpp new file mode 100644 index 000000000000..c04f3e8f5a1b --- /dev/null +++ b/ydb/core/tx/datashard/load_write_details_unit.cpp @@ -0,0 +1,64 @@ +#include "datashard_pipeline.h" +#include "execution_unit_ctors.h" +#include "datashard_write_operation.h" + +namespace NKikimr { +namespace NDataShard { + +class TLoadWriteDetailsUnit : public TExecutionUnit { +public: + TLoadWriteDetailsUnit(TDataShard &dataShard, + TPipeline &pipeline); + ~TLoadWriteDetailsUnit() override; + + bool IsReadyToExecute(TOperation::TPtr op) const override; + EExecutionStatus Execute(TOperation::TPtr op, + TTransactionContext &txc, + const TActorContext &ctx) override; + void Complete(TOperation::TPtr op, + const TActorContext &ctx) override; + +private: +}; + +TLoadWriteDetailsUnit::TLoadWriteDetailsUnit(TDataShard &dataShard, + TPipeline &pipeline) + : TExecutionUnit(EExecutionUnitKind::LoadTxDetails, true, dataShard, pipeline) +{ +} + +TLoadWriteDetailsUnit::~TLoadWriteDetailsUnit() +{ +} + +bool TLoadWriteDetailsUnit::IsReadyToExecute(TOperation::TPtr) const +{ + return true; +} + +EExecutionStatus TLoadWriteDetailsUnit::Execute(TOperation::TPtr op, + TTransactionContext &txc, + const TActorContext &ctx) +{ + TWriteOperation::TPtr writeOp = dynamic_cast(op.Get()); + Y_VERIFY_S(writeOp, "cannot cast operation of kind " << op->GetKind()); + + if (!Pipeline.LoadWriteDetails(txc, ctx, writeOp)) + return EExecutionStatus::Restart; + + return EExecutionStatus::Executed; +} + +void TLoadWriteDetailsUnit::Complete(TOperation::TPtr, + const TActorContext &) +{ +} + +THolder CreateLoadWriteDetailsUnit(TDataShard &dataShard, + TPipeline &pipeline) +{ + return MakeHolder(dataShard, pipeline); +} + +} // namespace NDataShard +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/operation.h b/ydb/core/tx/datashard/operation.h index d5499e58d24b..0b8e13a271d9 100644 --- a/ydb/core/tx/datashard/operation.h +++ b/ydb/core/tx/datashard/operation.h @@ -28,6 +28,12 @@ using NTabletFlatExecutor::TTableSnapshotContext; class TDataShard; +enum class ERestoreDataStatus { + Ok, + Restart, + Error, +}; + enum class ETxOrder { Unknown, Before, @@ -505,6 +511,33 @@ struct TExecutionProfile { THashMap UnitProfiles; }; +class TValidatedDataTx; +class TValidatedWriteTx; + +class TValidatedTx { +public: + using TPtr = std::shared_ptr; + + virtual ~TValidatedTx() = default; + + enum class EType { + DataTx, + WriteTx + }; + +public: + virtual EType GetType() const = 0; + virtual ui64 GetTxId() const = 0; + virtual ui64 GetMemoryConsumption() const = 0; + + bool IsProposed() const { + return GetSource() != TActorId(); + } + + YDB_ACCESSOR_DEF(TActorId, Source); + YDB_ACCESSOR_DEF(ui64, TxCacheUsage); +}; + struct TOperationAllListTag {}; struct TOperationGlobalListTag {}; struct TOperationDelayedReadListTag {}; diff --git a/ydb/core/tx/datashard/prepare_kqp_data_tx_in_rs_unit.cpp b/ydb/core/tx/datashard/prepare_kqp_data_tx_in_rs_unit.cpp index d8042e00fee6..3a0ca72d5de3 100644 --- a/ydb/core/tx/datashard/prepare_kqp_data_tx_in_rs_unit.cpp +++ b/ydb/core/tx/datashard/prepare_kqp_data_tx_in_rs_unit.cpp @@ -57,7 +57,7 @@ EExecutionStatus TPrepareKqpDataTxInRSUnit::Execute(TOperation::TPtr op, TTransa try { KqpPrepareInReadsets(op->InReadSets(), tx->GetDataTx()->GetKqpLocks(), - tx->GetDataTx()->GetKqpTasksRunner(), DataShard.TabletID()); + &tx->GetDataTx()->GetKqpTasksRunner(), DataShard.TabletID()); } catch (const yexception& e) { LOG_CRIT_S(ctx, NKikimrServices::TX_DATASHARD, "Exception while preparing in-readsets for KQP transaction " << *op << " at " << DataShard.TabletID() << ": " << CurrentExceptionMessage()); diff --git a/ydb/core/tx/datashard/prepare_write_tx_in_rs_unit.cpp b/ydb/core/tx/datashard/prepare_write_tx_in_rs_unit.cpp new file mode 100644 index 000000000000..e93a921245c9 --- /dev/null +++ b/ydb/core/tx/datashard/prepare_write_tx_in_rs_unit.cpp @@ -0,0 +1,79 @@ +#include "datashard_impl.h" +#include "datashard_kqp.h" +#include "datashard_pipeline.h" +#include "execution_unit_ctors.h" +#include "datashard_write_operation.h" + +namespace NKikimr { +namespace NDataShard { + +using namespace NMiniKQL; + +class TPrepareWriteTxInRSUnit : public TExecutionUnit { +public: + TPrepareWriteTxInRSUnit(TDataShard &dataShard, TPipeline &pipeline); + ~TPrepareWriteTxInRSUnit() override; + + bool IsReadyToExecute(TOperation::TPtr op) const override; + EExecutionStatus Execute(TOperation::TPtr op, TTransactionContext &txc, const TActorContext &ctx) override; + void Complete(TOperation::TPtr op, const TActorContext &ctx) override; +}; + +TPrepareWriteTxInRSUnit::TPrepareWriteTxInRSUnit(TDataShard &dataShard, + TPipeline &pipeline) + : TExecutionUnit(EExecutionUnitKind::PrepareWriteTxInRS, true, dataShard, pipeline) {} + +TPrepareWriteTxInRSUnit::~TPrepareWriteTxInRSUnit() {} + +bool TPrepareWriteTxInRSUnit::IsReadyToExecute(TOperation::TPtr) const { + return true; +} + +EExecutionStatus TPrepareWriteTxInRSUnit::Execute(TOperation::TPtr op, TTransactionContext &txc, + const TActorContext &ctx) +{ + TWriteOperation* writeOp = dynamic_cast(op.Get()); + Y_VERIFY_S(writeOp, "cannot cast operation of kind " << op->GetKind()); + + const TValidatedWriteTx::TPtr& writeTx = writeOp->GetWriteTx(); + + if (writeOp->IsTxDataReleased()) { + switch (Pipeline.RestoreDataTx(writeOp, txc)) { + case ERestoreDataStatus::Ok: + break; + case ERestoreDataStatus::Restart: + return EExecutionStatus::Restart; + case ERestoreDataStatus::Error: + Y_ABORT("Failed to restore writeOp data: %s", writeTx->GetErrStr().c_str()); + } + } + + if (writeTx->CheckCancelled()) { + writeOp->ReleaseTxData(txc); + BuildResult(op, NKikimrTxDataShard::TEvProposeTransactionResult::CANCELLED) + ->AddError(NKikimrTxDataShard::TError::EXECUTION_CANCELLED, "Tx was cancelled"); + + DataShard.IncCounter(op->IsImmediate() ? COUNTER_IMMEDIATE_TX_CANCELLED : COUNTER_PLANNED_TX_CANCELLED); + + return EExecutionStatus::Executed; + } + + try { + KqpPrepareInReadsets(op->InReadSets(), writeTx->GetKqpLocks() ? writeTx->GetKqpLocks().value() : NKikimrDataEvents::TKqpLocks{}, nullptr, DataShard.TabletID()); + } catch (const yexception& e) { + LOG_CRIT_S(ctx, NKikimrServices::TX_DATASHARD, "Exception while preparing in-readsets for KQP transaction " + << *op << " at " << DataShard.TabletID() << ": " << CurrentExceptionMessage()); + Y_FAIL_S("Unexpected exception in KQP in-readsets prepare: " << CurrentExceptionMessage()); + } + + return EExecutionStatus::Executed; +} + +void TPrepareWriteTxInRSUnit::Complete(TOperation::TPtr, const TActorContext &) {} + +THolder CreatePrepareWriteTxInRSUnit(TDataShard &dataShard, TPipeline &pipeline) { + return THolder(new TPrepareWriteTxInRSUnit(dataShard, pipeline)); +} + +} // namespace NDataShard +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/store_data_tx_unit.cpp b/ydb/core/tx/datashard/store_data_tx_unit.cpp index 4869ea002d3a..48fb69b43c6f 100644 --- a/ydb/core/tx/datashard/store_data_tx_unit.cpp +++ b/ydb/core/tx/datashard/store_data_tx_unit.cpp @@ -45,9 +45,10 @@ EExecutionStatus TStoreDataTxUnit::Execute(TOperation::TPtr op, TActiveTransaction *tx = dynamic_cast(op.Get()); Y_VERIFY_S(tx, "cannot cast operation of kind " << op->GetKind()); - Y_ABORT_UNLESS(tx->GetDataTx()); + auto dataTx = tx->GetDataTx(); + Y_ABORT_UNLESS(dataTx); - bool cached = Pipeline.SaveForPropose(tx->GetDataTx()); + bool cached = Pipeline.SaveForPropose(dataTx); if (cached) { Pipeline.RegisterDistributedWrites(op, txc.DB); } diff --git a/ydb/core/tx/datashard/store_write_unit.cpp b/ydb/core/tx/datashard/store_write_unit.cpp new file mode 100644 index 000000000000..fd2979c58b00 --- /dev/null +++ b/ydb/core/tx/datashard/store_write_unit.cpp @@ -0,0 +1,80 @@ +#include "const.h" +#include "datashard_pipeline.h" +#include "execution_unit_ctors.h" + +#include "datashard_write_operation.h" + +namespace NKikimr { +namespace NDataShard { + +class TStoreWriteUnit : public TExecutionUnit { +public: + TStoreWriteUnit(TDataShard &dataShard, + TPipeline &pipeline); + ~TStoreWriteUnit() override; + + bool IsReadyToExecute(TOperation::TPtr op) const override; + EExecutionStatus Execute(TOperation::TPtr op, + TTransactionContext &txc, + const TActorContext &ctx) override; + void Complete(TOperation::TPtr op, + const TActorContext &ctx) override; + +private: +}; + +TStoreWriteUnit::TStoreWriteUnit(TDataShard &dataShard, + TPipeline &pipeline) + : TExecutionUnit(EExecutionUnitKind::StoreDataTx, false, dataShard, pipeline) +{ +} + +TStoreWriteUnit::~TStoreWriteUnit() +{ +} + +bool TStoreWriteUnit::IsReadyToExecute(TOperation::TPtr) const +{ + return true; +} + +EExecutionStatus TStoreWriteUnit::Execute(TOperation::TPtr op, + TTransactionContext &txc, + const TActorContext &ctx) +{ + Y_ABORT_UNLESS(!op->IsAborted() && !op->IsInterrupted()); + + TWriteOperation* writeOp = TWriteOperation::CastWriteOperation(op); + auto writeTx = writeOp->GetWriteTx(); + Y_ABORT_UNLESS(writeTx); + + bool cached = Pipeline.SaveForPropose(writeTx); + if (cached) { + Pipeline.RegisterDistributedWrites(op, txc.DB); + } + + Pipeline.ProposeTx(op, writeOp->GetTxBody(), txc, ctx); + + if (!op->HasVolatilePrepareFlag()) { + writeOp->ClearTxBody(); + } + + writeOp->ClearWriteTx(); + + return EExecutionStatus::DelayCompleteNoMoreRestarts; +} + +void TStoreWriteUnit::Complete(TOperation::TPtr op, + const TActorContext &ctx) +{ + Pipeline.ProposeComplete(op, ctx); +} + +THolder CreateStoreWriteUnit(TDataShard &dataShard, + TPipeline &pipeline) +{ + return MakeHolder(dataShard, pipeline); +} + +} // namespace NDataShard +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp index 0e5b63f60a8a..a090992a7b0b 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp @@ -1913,7 +1913,7 @@ TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithE Y_VERIFY_S(colCount == 0 || colCount == writes.GetColumns().size(), "Only equal column count is supported now."); colCount = writes.GetColumns().size(); - const auto& row = rows.ProcessNextRow(); + const auto& row = rows.ProcessNextRow(tableId); Y_VERIFY(row.Cells.size() == colCount); std::copy(row.Cells.begin(), row.Cells.end(), std::back_inserter(cells)); } @@ -1931,7 +1931,7 @@ TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithE UNIT_ASSERT(blobData.size() < 8_MB); ui64 txId = record.GetTxId(); - auto txMode = NKikimr::NDataShard::EvWrite::Convertor::GetTxMode(record.GetFlags()); + auto txMode = NKikimr::NDataShard::NEvWrite::TConvertor::GetTxMode(record.GetFlags()); std::vector columnIds(colCount); std::iota(columnIds.begin(), columnIds.end(), 1); @@ -1957,17 +1957,25 @@ TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithE if (event->GetTypeRewrite() != NEvents::TDataEvents::EvWriteResult) return; - rows.CompleteNextRow(); - const auto& record = event->Get()->Record; Cerr << "EvWriteResult event is observed and will be replaced with EvProposeTransactionResult: " << record.ShortDebugString() << Endl; // Construct new EvProposeTransactionResult ui64 txId = record.GetTxId(); ui64 origin = record.GetOrigin(); - auto status = NKikimr::NDataShard::EvWrite::Convertor::GetStatus(record.GetStatus()); + auto status = NKikimr::NDataShard::NEvWrite::TConvertor::GetStatus(record.GetStatus()); auto evResult = std::make_unique(NKikimrTxDataShard::TX_KIND_DATA, origin, txId, status); + + if (status == NKikimrTxDataShard::TEvProposeTransactionResult::PREPARED) { + evResult->SetPrepared(record.GetMinStep(), record.GetMaxStep(), {}); + evResult->Record.MutableDomainCoordinators()->CopyFrom(record.GetDomainCoordinators()); + + rows.PrepareNextRow(); + } + else { + rows.CompleteNextRow(); + } // Replace event auto handle = new IEventHandle(event->Recipient, event->Sender, evResult.release(), 0, event->Cookie); @@ -2001,6 +2009,25 @@ void UploadRows(TTestActorRuntime& runtime, const TString& tablePath, const TVec UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, Ydb::StatusIds::SUCCESS, "Status: " << ev->Get()->Status << " Issues: " << ev->Get()->Issues); } +void SendProposeToCoordinator(Tests::TServer::TPtr server, const std::vector& affectedTabletIds, ui64 minStep, ui64 maxStep, ui64 txId) +{ + auto& runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + auto event = std::make_unique(coordinator, txId, 0, minStep, maxStep); + + auto* affectedSet = event->Record.MutableTransaction()->MutableAffectedSet(); + affectedSet->Reserve(affectedTabletIds.size()); + for (auto affectedTabletId : affectedTabletIds) { + auto* x = affectedSet->Add(); + x->SetTabletId(affectedTabletId); + x->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + } + + runtime.SendToPipe(coordinator, sender, event.release()); +} + void WaitTabletBecomesOffline(TServer::TPtr server, ui64 tabletId) { struct IsShardStateChange diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.h b/ydb/core/tx/datashard/ut_common/datashard_ut_common.h index 6b2482cd90c3..6e5c2d21b9f5 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.h +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.h @@ -712,17 +712,24 @@ NKikimrDataEvents::TEvWriteResult Write(TTestActorRuntime& runtime, TActorId sen NKikimrDataEvents::TEvWriteResult Write(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, ui64 txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED, NWilson::TTraceId traceId = {}); struct TEvWriteRow { - TEvWriteRow(std::initializer_list init) { + TEvWriteRow(const TTableId& tableId, std::initializer_list init) + : TableId(tableId) + { for (ui32 value : init) { Cells.emplace_back(TCell((const char*)&value, sizeof(ui32))); } } + TEvWriteRow(std::initializer_list init) + : TEvWriteRow({}, init) {} + + TTableId TableId; std::vector Cells; enum EStatus { Init, Processing, + Prepared, Completed } Status = Init; }; @@ -732,16 +739,24 @@ class TEvWriteRows : public std::vector { TEvWriteRows(std::initializer_list init) : std::vector(init) { } - const TEvWriteRow& ProcessNextRow() { - auto processedRow = std::find_if(begin(), end(), [](const auto& row) { return row.Status == TEvWriteRow::EStatus::Init; }); + const TEvWriteRow& ProcessNextRow(const TTableId& tableId) { + bool allTablesEmpty = std::all_of(begin(), end(), [](const auto& row) { return !bool(row.TableId); }); + auto processedRow = std::find_if(begin(), end(), [tableId, allTablesEmpty](const auto& row) { return row.Status == TEvWriteRow::EStatus::Init && (allTablesEmpty || row.TableId == tableId); }); Y_VERIFY_S(processedRow != end(), "There should be at least one EvWrite row to process."); + processedRow->Status = TEvWriteRow::EStatus::Processing; Cerr << "Processing next EvWrite row\n"; return *processedRow; } - void CompleteNextRow() { + void PrepareNextRow() { auto processedRow = std::find_if(begin(), end(), [](const auto& row) { return row.Status == TEvWriteRow::EStatus::Processing; }); Y_VERIFY_S(processedRow != end(), "There should be at lest one EvWrite row processing."); + processedRow->Status = TEvWriteRow::EStatus::Prepared; + Cerr << "Prepared next EvWrite row\n"; + } + void CompleteNextRow() { + auto processedRow = std::find_if(begin(), end(), [](const auto& row) { return row.Status == TEvWriteRow::EStatus::Processing || row.Status == TEvWriteRow::EStatus::Prepared; }); + Y_VERIFY_S(processedRow != end(), "There should be at lest one EvWrite row processing."); processedRow->Status = TEvWriteRow::EStatus::Completed; Cerr << "Completed next EvWrite row\n"; } @@ -751,6 +766,8 @@ TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithE void UploadRows(TTestActorRuntime& runtime, const TString& tablePath, const TVector>& types, const TVector& keys, const TVector& values); +void SendProposeToCoordinator(Tests::TServer::TPtr server, const std::vector& affectedTabletIds, ui64 minStep, ui64 maxStep, ui64 txId); + struct IsTxResultComplete { bool operator()(IEventHandle& ev) { diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make index d89f6fa8e5fe..247a0511bb3b 100644 --- a/ydb/core/tx/datashard/ya.make +++ b/ydb/core/tx/datashard/ya.make @@ -34,6 +34,7 @@ SRCS( check_snapshot_tx_unit.cpp check_write_unit.cpp complete_data_tx_unit.cpp + complete_write_unit.cpp completed_operations_unit.cpp conflicts_cache.cpp create_cdc_stream_unit.cpp @@ -138,6 +139,7 @@ SRCS( erase_rows_condition.cpp execute_commit_writes_tx_unit.cpp execute_data_tx_unit.cpp + execute_write_unit.cpp execute_distributed_erase_tx_unit.cpp execute_kqp_data_tx_unit.cpp execute_kqp_scan_tx_unit.cpp @@ -150,6 +152,7 @@ SRCS( export_iface.h export_scan.cpp finalize_build_index_unit.cpp + finalize_plan_tx_unit.cpp finish_propose_unit.cpp finish_propose_write_unit.cpp follower_edge.cpp @@ -159,6 +162,7 @@ SRCS( key_validator.cpp load_and_wait_in_rs_unit.cpp load_tx_details_unit.cpp + load_write_details_unit.cpp make_scan_snapshot_unit.cpp make_snapshot_unit.cpp move_index_unit.cpp @@ -167,6 +171,7 @@ SRCS( operation.h plan_queue_unit.cpp prepare_data_tx_in_rs_unit.cpp + prepare_write_tx_in_rs_unit.cpp prepare_distributed_erase_tx_in_rs_unit.cpp prepare_kqp_data_tx_in_rs_unit.cpp prepare_scheme_tx_in_rs_unit.cpp @@ -188,13 +193,13 @@ SRCS( store_and_send_out_rs_unit.cpp store_commit_writes_tx_unit.cpp store_data_tx_unit.cpp + store_write_unit.cpp store_distributed_erase_tx_unit.cpp store_scheme_tx_unit.cpp store_snapshot_tx_unit.cpp volatile_tx.cpp wait_for_plan_unit.cpp wait_for_stream_clearance_unit.cpp - write_unit.cpp upload_stats.cpp )