You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository was archived by the owner on Aug 23, 2023. It is now read-only.
store save state as a property of the aggemtric instead of chunk
Rather then tracking the state of each individual chunk, just keep
a record of the most recent saveStart(add to write queue)/saveFinish(write to cassandra)
as properties of the aggMetric. When we save chunks we always save all unsaved
chunks, so we dont lose anything by tracking the save state for all chunks in
one variable.
issue #452
@@ -332,18 +252,12 @@ func (a *AggMetric) addAggregators(ts uint32, val float64) {
332
252
333
253
// write a chunk to persistent storage. This should only be called while holding a.Lock()
334
254
func (a*AggMetric) persist(posint) {
335
-
if!cluster.ThisNode.IsPrimary() {
336
-
ifLogLevel<2 {
337
-
log.Debug("AM persist(): node is not primary, not saving chunk.")
338
-
}
339
-
return
340
-
}
341
-
342
-
pre:=time.Now()
343
255
chunk:=a.Chunks[pos]
256
+
pre:=time.Now()
344
257
345
-
ifchunk.Saved||chunk.Saving {
346
-
// this can happen if chunk was persisted by GC (stale) and then new data triggered another persist call
258
+
ifa.lastSaveStart>=chunk.T0 {
259
+
// this can happen if there are 2 primary MT nodes both saving chunks to Cassandra or
260
+
// a primary failed and this node was promoted to be primary but metric consuming is lagging.
347
261
log.Debug("AM persist(): duplicate persist call for chunk.")
348
262
return
349
263
}
@@ -352,11 +266,12 @@ func (a *AggMetric) persist(pos int) {
352
266
pending:=make([]*ChunkWriteRequest, 1)
353
267
// add the current chunk to the list of chunks to send to the writeQueue
354
268
pending[0] =&ChunkWriteRequest{
269
+
metric: a,
355
270
key: a.Key,
356
-
chunk: chunk,
271
+
span: a.ChunkSpan,
357
272
ttl: a.ttl,
273
+
chunk: chunk,
358
274
timestamp: time.Now(),
359
-
span: a.ChunkSpan,
360
275
}
361
276
362
277
// if we recently became the primary, there may be older chunks
@@ -367,16 +282,17 @@ func (a *AggMetric) persist(pos int) {
367
282
previousPos+=len(a.Chunks)
368
283
}
369
284
previousChunk:=a.Chunks[previousPos]
370
-
for (previousChunk.T0<chunk.T0) &&!previousChunk.Saved&&!previousChunk.Saving {
285
+
for (previousChunk.T0<chunk.T0) &&(a.lastSaveStart<previousChunk.T0) {
371
286
ifLogLevel<2 {
372
287
log.Debug("AM persist(): old chunk needs saving. Adding %s:%d to writeQueue", a.Key, previousChunk.T0)
373
288
}
374
289
pending=append(pending, &ChunkWriteRequest{
290
+
metric: a,
375
291
key: a.Key,
376
-
chunk: previousChunk,
292
+
span: a.ChunkSpan,
377
293
ttl: a.ttl,
294
+
chunk: previousChunk,
378
295
timestamp: time.Now(),
379
-
span: a.ChunkSpan,
380
296
})
381
297
previousPos--
382
298
ifpreviousPos<0 {
@@ -385,6 +301,9 @@ func (a *AggMetric) persist(pos int) {
385
301
previousChunk=a.Chunks[previousPos]
386
302
}
387
303
304
+
// Every chunk with a T0 <= this chunks' T0 is now either saved, or in the writeQueue.
305
+
a.lastSaveStart=chunk.T0
306
+
388
307
ifLogLevel<2 {
389
308
log.Debug("AM persist(): sending %d chunks to write queue", len(pending))
390
309
}
@@ -402,9 +321,8 @@ func (a *AggMetric) persist(pos int) {
402
321
ifLogLevel<2 {
403
322
log.Debug("AM persist(): sealing chunk %d/%d (%s:%d) and adding to write queue.", pendingChunk, len(pending), a.Key, chunk.T0)
404
323
}
405
-
pending[pendingChunk].chunk.Finish()
406
324
a.store.Add(pending[pendingChunk])
407
-
pending[pendingChunk].chunk.Saving=true
325
+
408
326
pendingChunk--
409
327
}
410
328
persistDuration.Value(time.Now().Sub(pre))
@@ -432,6 +350,7 @@ func (a *AggMetric) Add(ts uint32, val float64) {
432
350
}
433
351
434
352
log.Debug("AM %s Add(): created first chunk with first point: %v", a.Key, a.Chunks[0])
353
+
a.lastWrite=uint32(time.Now().Unix())
435
354
a.addAggregators(ts, val)
436
355
return
437
356
}
@@ -440,34 +359,40 @@ func (a *AggMetric) Add(ts uint32, val float64) {
440
359
441
360
ift0==currentChunk.T0 {
442
361
// last prior data was in same chunk as new point
443
-
ifcurrentChunk.Saving {
444
-
// if we're already saving the chunk, it means it has the end-of-stream marker and any new points behind it wouldn't be read by an iterator
362
+
ifcurrentChunk.Closed {
363
+
// if we've already 'finished' the chunk, it means it has the end-of-stream marker and any new points behind it wouldn't be read by an iterator
445
364
// you should monitor this metric closely, it indicates that maybe your GC settings don't match how you actually send data (too late)
446
-
addToSavingChunk.Inc()
365
+
addToClosedChunk.Inc()
447
366
return
448
367
}
449
368
450
-
iferr:=currentChunk.Push(ts, val); err==nil {
451
-
ifcurrentChunk.Saved {
452
-
// if we're here, it means we marked it as Saved because it was saved by an other primary, not by us since Saving is false.
453
-
// typically this happens when non-primaries receive metrics that the primary already saved (maybe cause their metrics consumer is laggy)
454
-
// we allow adding data to such chunks in that case, though this open the possibility for data to be rejected by the primary, to be
455
-
// visible on secondaries.
456
-
addToSavedChunk.Inc()
457
-
}
458
-
} else {
369
+
iferr:=currentChunk.Push(ts, val); err!=nil {
459
370
log.Debug("AM failed to add metric to chunk for %s. %s", a.Key, err)
460
371
metricsTooOld.Inc()
461
372
return
462
373
}
374
+
a.lastWrite=uint32(time.Now().Unix())
463
375
log.Debug("AM %s Add(): pushed new value to last chunk: %v", a.Key, a.Chunks[0])
464
376
} elseift0<currentChunk.T0 {
465
377
log.Debug("AM Point at %d has t0 %d, goes back into previous chunk. CurrentChunk t0: %d, LastTs: %d", ts, t0, currentChunk.T0, currentChunk.LastTs)
466
378
metricsTooOld.Inc()
467
379
return
468
380
} else {
469
-
// persist the chunk. If the writeQueue is full, then this will block.
470
-
a.persist(a.CurrentChunkPos)
381
+
// Data belongs in a new chunk.
382
+
383
+
// If it isnt finished already, add the end-of-stream marker and flag the chunk as "closed"
384
+
if!currentChunk.Closed {
385
+
currentChunk.Finish()
386
+
}
387
+
388
+
// If we are a primary node, then add the chunk to the write queue to be saved to Cassandra
389
+
ifcluster.ThisNode.IsPrimary() {
390
+
ifLogLevel<2 {
391
+
log.Debug("AM persist(): node is primary, saving chunk.")
392
+
}
393
+
// persist the chunk. If the writeQueue is full, then this will block.
394
+
a.persist(a.CurrentChunkPos)
395
+
}
471
396
472
397
a.CurrentChunkPos++
473
398
ifa.CurrentChunkPos>=int(a.NumChunks) {
@@ -490,6 +415,7 @@ func (a *AggMetric) Add(ts uint32, val float64) {
490
415
}
491
416
log.Debug("AM %s Add(): cleared chunk at %d of %d and replaced with new. and added the new point: %s", a.Key, a.CurrentChunkPos, len(a.Chunks), a.Chunks[a.CurrentChunkPos])
0 commit comments