11#include " columnshard_impl.h"
22#include " blobs_action/transaction/tx_write.h"
33#include " blobs_action/transaction/tx_draft.h"
4+ #include " counters/columnshard.h"
45#include " operations/slice_builder.h"
56#include " operations/write_data.h"
67
@@ -21,9 +22,17 @@ void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const
2122 IncCounter (COUNTER_WRITE_OVERLOAD);
2223 CSCounters.OnOverloadInsertTable (writeData.GetSize ());
2324 break ;
24- case EOverloadStatus::Shard :
25+ case EOverloadStatus::ShardTxInFly :
2526 IncCounter (COUNTER_WRITE_OVERLOAD);
26- CSCounters.OnOverloadShard (writeData.GetSize ());
27+ CSCounters.OnOverloadShardTx (writeData.GetSize ());
28+ break ;
29+ case EOverloadStatus::ShardWritesInFly:
30+ IncCounter (COUNTER_WRITE_OVERLOAD);
31+ CSCounters.OnOverloadShardWrites (writeData.GetSize ());
32+ break ;
33+ case EOverloadStatus::ShardWritesSizeInFly:
34+ IncCounter (COUNTER_WRITE_OVERLOAD);
35+ CSCounters.OnOverloadShardWritesSize (writeData.GetSize ());
2736 break ;
2837 case EOverloadStatus::None:
2938 Y_ABORT (" invalid function usage" );
@@ -45,8 +54,20 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId)
4554 return EOverloadStatus::InsertTable;
4655 }
4756
48- if (WritesMonitor.ShardOverloaded ()) {
49- return EOverloadStatus::Shard;
57+ ui64 txLimit = Settings.OverloadTxInFlight ;
58+ ui64 writesLimit = Settings.OverloadWritesInFlight ;
59+ ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight ;
60+ if (txLimit && Executor ()->GetStats ().TxInFly > txLimit) {
61+ AFL_WARN (NKikimrServices::TX_COLUMNSHARD)(" event" , " shard_overload" )(" reason" , " tx_in_fly" )(" sum" , Executor ()->GetStats ().TxInFly )(" limit" , txLimit);
62+ return EOverloadStatus::ShardTxInFly;
63+ }
64+ if (writesLimit && WritesMonitor.GetWritesInFlight () > writesLimit) {
65+ AFL_WARN (NKikimrServices::TX_COLUMNSHARD)(" event" , " shard_overload" )(" reason" , " writes_in_fly" )(" sum" , WritesMonitor.GetWritesInFlight ())(" limit" , writesLimit);
66+ return EOverloadStatus::ShardWritesInFly;
67+ }
68+ if (writesSizeLimit && WritesMonitor.GetWritesSizeInFlight () > writesSizeLimit) {
69+ AFL_WARN (NKikimrServices::TX_COLUMNSHARD)(" event" , " shard_overload" )(" reason" , " writes_size_in_fly" )(" sum" , WritesMonitor.GetWritesSizeInFlight ())(" limit" , writesSizeLimit);
70+ return EOverloadStatus::ShardWritesSizeInFly;
5071 }
5172 return EOverloadStatus::None;
5273}
@@ -57,7 +78,8 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo
5778 auto & putResult = ev->Get ()->GetPutResult ();
5879 OnYellowChannels (putResult);
5980 NOlap::TWritingBuffer& wBuffer = ev->Get ()->MutableWritesBuffer ();
60- auto & baseAggregations = wBuffer.GetAggregations ();
81+ auto baseAggregations = wBuffer.GetAggregations ();
82+ wBuffer.InitReplyReceived (TMonotonic::Now ());
6183
6284 auto wg = WritesMonitor.FinishWrite (wBuffer.GetSumSize (), wBuffer.GetAggregations ().size ());
6385
@@ -70,13 +92,13 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo
7092
7193 auto result = std::make_unique<TEvColumnShard::TEvWriteResult>(TabletID (), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR);
7294 ctx.Send (writeMeta.GetSource (), result.release ());
73- CSCounters.OnFailedWriteResponse ();
95+ CSCounters.OnFailedWriteResponse (EWriteFailReason::NoTable );
7496 wBuffer.RemoveData (aggr, StoragesManager->GetInsertOperator ());
7597 continue ;
7698 }
7799
78100 if (putResult.GetPutStatus () != NKikimrProto::OK) {
79- CSCounters.OnWritePutBlobsFail (( TMonotonic::Now () - writeMeta.GetWriteStartInstant ()). MilliSeconds ());
101+ CSCounters.OnWritePutBlobsFail (TMonotonic::Now () - writeMeta.GetWriteStartInstant ());
80102 IncCounter (COUNTER_WRITE_FAIL);
81103
82104 auto errCode = NKikimrTxColumnShard::EResultStatus::STORAGE_ERROR;
@@ -97,16 +119,17 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo
97119 auto result = NEvents::TDataEvents::TEvWriteResult::BuildError (TabletID (), operation->GetTxId (), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, " put data fails" );
98120 ctx.Send (writeMeta.GetSource (), result.release ());
99121 }
100- CSCounters.OnFailedWriteResponse ();
122+ CSCounters.OnFailedWriteResponse (EWriteFailReason::PutBlob );
101123 wBuffer.RemoveData (aggr, StoragesManager->GetInsertOperator ());
102124 } else {
103125 const TMonotonic now = TMonotonic::Now ();
104- CSCounters.OnWritePutBlobsSuccess ((now - writeMeta.GetWriteStartInstant ()).MilliSeconds ());
105- CSCounters.OnWriteMiddle1PutBlobsSuccess ((now - writeMeta.GetWriteMiddle1StartInstant ()).MilliSeconds ());
106- CSCounters.OnWriteMiddle2PutBlobsSuccess ((now - writeMeta.GetWriteMiddle2StartInstant ()).MilliSeconds ());
107- CSCounters.OnWriteMiddle3PutBlobsSuccess ((now - writeMeta.GetWriteMiddle3StartInstant ()).MilliSeconds ());
108- CSCounters.OnWriteMiddle4PutBlobsSuccess ((now - writeMeta.GetWriteMiddle4StartInstant ()).MilliSeconds ());
109- CSCounters.OnWriteMiddle5PutBlobsSuccess ((now - writeMeta.GetWriteMiddle5StartInstant ()).MilliSeconds ());
126+ CSCounters.OnWritePutBlobsSuccess (now - writeMeta.GetWriteStartInstant ());
127+ CSCounters.OnWriteMiddle1PutBlobsSuccess (now - writeMeta.GetWriteMiddle1StartInstant ());
128+ CSCounters.OnWriteMiddle2PutBlobsSuccess (now - writeMeta.GetWriteMiddle2StartInstant ());
129+ CSCounters.OnWriteMiddle3PutBlobsSuccess (now - writeMeta.GetWriteMiddle3StartInstant ());
130+ CSCounters.OnWriteMiddle4PutBlobsSuccess (now - writeMeta.GetWriteMiddle4StartInstant ());
131+ CSCounters.OnWriteMiddle5PutBlobsSuccess (now - writeMeta.GetWriteMiddle5StartInstant ());
132+ CSCounters.OnWriteMiddle6PutBlobsSuccess (now - writeMeta.GetWriteMiddle6StartInstant ());
110133 LOG_S_DEBUG (" Write (record) into pathId " << writeMeta.GetTableId ()
111134 << (writeMeta.GetWriteId () ? (" writeId " + ToString (writeMeta.GetWriteId ())).c_str () : " " ) << " at tablet " << TabletID ());
112135
@@ -139,18 +162,20 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex
139162 IncCounter (signalIndex);
140163
141164 ctx.Send (source, std::make_unique<TEvColumnShard::TEvWriteResult>(TabletID (), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR));
142- CSCounters.OnFailedWriteResponse ();
143165 return ;
144166 };
145167
146168 if (!AppDataVerified ().ColumnShardConfig .GetWritingEnabled ()) {
147169 AFL_WARN (NKikimrServices::TX_COLUMNSHARD)(" event" , " skip_writing" )(" reason" , " disabled" );
170+ CSCounters.OnFailedWriteResponse (EWriteFailReason::Disabled);
148171 return returnFail (COUNTER_WRITE_FAIL);
149172 }
150173
151174 if (!TablesManager.IsReadyForWrite (tableId)) {
152175 LOG_S_NOTICE (" Write (fail) into pathId:" << writeMeta.GetTableId () << (TablesManager.HasPrimaryIndex ()? " " : " no index" )
153176 << " at tablet " << TabletID ());
177+
178+ CSCounters.OnFailedWriteResponse (EWriteFailReason::NoTable);
154179 return returnFail (COUNTER_WRITE_FAIL);
155180 }
156181
@@ -159,6 +184,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex
159184 if (!arrowData->ParseFromProto (record)) {
160185 LOG_S_ERROR (" Write (fail) " << record.GetData ().size () << " bytes into pathId " << writeMeta.GetTableId ()
161186 << " at tablet " << TabletID ());
187+ CSCounters.OnFailedWriteResponse (EWriteFailReason::IncorrectSchema);
162188 return returnFail (COUNTER_WRITE_FAIL);
163189 }
164190
@@ -167,7 +193,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex
167193 if (overloadStatus != EOverloadStatus::None) {
168194 std::unique_ptr<NActors::IEventBase> result = std::make_unique<TEvColumnShard::TEvWriteResult>(TabletID (), writeData.GetWriteMeta (), NKikimrTxColumnShard::EResultStatus::OVERLOADED);
169195 OverloadWriteFail (overloadStatus, writeData, std::move (result), ctx);
170- CSCounters.OnFailedWriteResponse ();
196+ CSCounters.OnFailedWriteResponse (EWriteFailReason::Overload );
171197 } else {
172198 if (ui64 writeId = (ui64)HasLongTxWrite (writeMeta.GetLongTxIdUnsafe (), writeMeta.GetWritePartId ())) {
173199 LOG_S_DEBUG (" Write (duplicate) into pathId " << writeMeta.GetTableId ()
@@ -179,7 +205,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex
179205 auto result = std::make_unique<TEvColumnShard::TEvWriteResult>(
180206 TabletID (), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS);
181207 ctx.Send (writeMeta.GetSource (), result.release ());
182- CSCounters.OnFailedWriteResponse ();
208+ CSCounters.OnFailedWriteResponse (EWriteFailReason::LongTxDuplication );
183209 return ;
184210 }
185211
0 commit comments