Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

Commit 5e667b3

Browse files
authored
Merge pull request #1010 from grafana/block_for_accounting_events
Block to submit accounting events Force-merging after coordinating with @Dieterbe
2 parents 54300e8 + 9f34629 commit 5e667b3

File tree

4 files changed

+156
-21
lines changed

4 files changed

+156
-21
lines changed

dashboard.json

+143-11
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"gnetId": null,
3737
"graphTooltip": 1,
3838
"id": null,
39-
"iteration": 1535621129449,
39+
"iteration": 1535648268402,
4040
"links": [],
4141
"panels": [
4242
{
@@ -3613,8 +3613,8 @@
36133613
"fill": 0,
36143614
"grid": {},
36153615
"gridPos": {
3616-
"h": 7,
3617-
"w": 12,
3616+
"h": 6,
3617+
"w": 8,
36183618
"x": 0,
36193619
"y": 74
36203620
},
@@ -3712,6 +3712,138 @@
37123712
"alignLevel": null
37133713
}
37143714
},
3715+
{
3716+
"aliasColors": {
3717+
"add-latency-p90": "#c15c17",
3718+
"max": "#890F02",
3719+
"used": "#3F6833",
3720+
"utilisation": "#6ed0e0"
3721+
},
3722+
"bars": false,
3723+
"dashLength": 10,
3724+
"dashes": false,
3725+
"datasource": "$datasource",
3726+
"editable": true,
3727+
"error": false,
3728+
"fill": 0,
3729+
"grid": {},
3730+
"gridPos": {
3731+
"h": 6,
3732+
"w": 7,
3733+
"x": 8,
3734+
"y": 74
3735+
},
3736+
"id": 50,
3737+
"legend": {
3738+
"avg": false,
3739+
"current": false,
3740+
"max": false,
3741+
"min": false,
3742+
"show": true,
3743+
"total": false,
3744+
"values": false
3745+
},
3746+
"lines": true,
3747+
"linewidth": 2,
3748+
"links": [],
3749+
"nullPointMode": "connected",
3750+
"percentage": false,
3751+
"pointradius": 5,
3752+
"points": false,
3753+
"renderer": "flot",
3754+
"seriesOverrides": [
3755+
{
3756+
"alias": "used",
3757+
"lines": false,
3758+
"pointradius": 1,
3759+
"points": true,
3760+
"yaxis": 2
3761+
},
3762+
{
3763+
"alias": "max",
3764+
"lines": false,
3765+
"pointradius": 1,
3766+
"points": true,
3767+
"yaxis": 2
3768+
},
3769+
{
3770+
"alias": "/latency/",
3771+
"fill": 2,
3772+
"linewidth": 0
3773+
}
3774+
],
3775+
"spaceLength": 10,
3776+
"stack": false,
3777+
"steppedLine": false,
3778+
"targets": [
3779+
{
3780+
"refCount": 1,
3781+
"refId": "A",
3782+
"target": "alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.max.gauge64), 'max')",
3783+
"textEditor": false
3784+
},
3785+
{
3786+
"refCount": 1,
3787+
"refId": "B",
3788+
"target": "alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.used.max.gauge32), 'used')",
3789+
"textEditor": false
3790+
},
3791+
{
3792+
"refCount": 1,
3793+
"refId": "C",
3794+
"target": "alias(averageSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.add.latency.p90.gauge32), 'add-latency-p90')",
3795+
"textEditor": false
3796+
},
3797+
{
3798+
"hide": true,
3799+
"refCount": 0,
3800+
"refId": "D",
3801+
"target": "alias(divideSeries(#A,#B),'utilisation')",
3802+
"targetFull": "alias(divideSeries(alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.max.gauge64), 'max'),alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.used.max.gauge32), 'used')),'utilisation')",
3803+
"textEditor": true
3804+
}
3805+
],
3806+
"thresholds": [],
3807+
"timeFrom": null,
3808+
"timeShift": null,
3809+
"title": "accounting",
3810+
"tooltip": {
3811+
"msResolution": false,
3812+
"shared": true,
3813+
"sort": 0,
3814+
"value_type": "cumulative"
3815+
},
3816+
"type": "graph",
3817+
"xaxis": {
3818+
"buckets": null,
3819+
"mode": "time",
3820+
"name": null,
3821+
"show": true,
3822+
"values": []
3823+
},
3824+
"yaxes": [
3825+
{
3826+
"format": "ms",
3827+
"label": null,
3828+
"logBase": 1,
3829+
"max": null,
3830+
"min": null,
3831+
"show": true
3832+
},
3833+
{
3834+
"format": "none",
3835+
"label": null,
3836+
"logBase": 1,
3837+
"max": null,
3838+
"min": null,
3839+
"show": true
3840+
}
3841+
],
3842+
"yaxis": {
3843+
"align": false,
3844+
"alignLevel": null
3845+
}
3846+
},
37153847
{
37163848
"aliasColors": {
37173849
"add": "#629E51",
@@ -3731,9 +3863,9 @@
37313863
"fill": 10,
37323864
"grid": {},
37333865
"gridPos": {
3734-
"h": 7,
3735-
"w": 12,
3736-
"x": 12,
3866+
"h": 6,
3867+
"w": 9,
3868+
"x": 15,
37373869
"y": 74
37383870
},
37393871
"id": 28,
@@ -3844,7 +3976,7 @@
38443976
"h": 1,
38453977
"w": 24,
38463978
"x": 0,
3847-
"y": 81
3979+
"y": 80
38483980
},
38493981
"id": 48,
38503982
"panels": [],
@@ -3871,7 +4003,7 @@
38714003
"h": 7,
38724004
"w": 12,
38734005
"x": 0,
3874-
"y": 82
4006+
"y": 81
38754007
},
38764008
"id": 32,
38774009
"legend": {
@@ -3993,7 +4125,7 @@
39934125
"h": 7,
39944126
"w": 12,
39954127
"x": 12,
3996-
"y": 82
4128+
"y": 81
39974129
},
39984130
"id": 15,
39994131
"legend": {
@@ -4173,5 +4305,5 @@
41734305
"timezone": "browser",
41744306
"title": "Metrictank",
41754307
"uid": "tQW3QShiz",
4176-
"version": 1
4177-
}
4308+
"version": 5
4309+
}

docs/operations.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ If you expect consistent or predictable load, you may also want to monitor:
3434

3535
* `metrictank.stats.$environment.$instance.store.cassandra.chunk_operations.save_ok.counter32`: number of saved chunks (based on your chunkspan settings)
3636
* `metrictank.stats.$environment.$instance.api.request_handle.values.rate32` : rate per second of render requests
37-
* `metrictank.stats.$environment.$instance.input.*.*.received.counter32`: input counter (derive with perSecond(
37+
* `metrictank.stats.$environment.$instance.input.*.*.received.counter32`: input counter (derive with perSecond)
38+
* `metrictank.stats.$environment.$instance.cache.accounting.queue.size.used.max.gauge32`: accounting queue size, if this queue fills up, it will slow down requests (compare to size.max)
3839

3940

4041
## Crash

mdata/cache/accnt/flat_accnt.go

+6-7
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ package accnt
22

33
import (
44
"sort"
5+
"time"
56

67
"github.com/grafana/metrictank/mdata/chunk"
78
"github.com/raintank/schema"
8-
"github.com/raintank/worldping-api/pkg/log"
99
)
1010

1111
const evictQSize = 1000
@@ -110,6 +110,7 @@ func NewFlatAccnt(maxSize uint64) *FlatAccnt {
110110
eventQ: make(chan FlatAccntEvent, EventQSize),
111111
}
112112
cacheSizeMax.SetUint64(maxSize)
113+
accntEventQueueMax.SetUint64(uint64(EventQSize))
113114

114115
go accnt.eventLoop()
115116
return &accnt
@@ -157,12 +158,10 @@ func (a *FlatAccnt) act(eType eventType, payload interface{}) {
157158
pl: payload,
158159
}
159160

160-
select {
161-
// we never want to block for accounting, rather just let it miss some events and print an error
162-
case a.eventQ <- event:
163-
default:
164-
log.Error(3, "Failed to submit event to accounting, channel was blocked")
165-
}
161+
pre := time.Now()
162+
a.eventQ <- event
163+
accntEventAddDuration.Value(time.Now().Sub(pre))
164+
accntEventQueueUsed.Value(len(a.eventQ))
166165
}
167166

168167
func (a *FlatAccnt) eventLoop() {

mdata/cache/accnt/stats.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ var (
3131
// metric cache.ops.chunk.evict is how many chunks were evicted from the cache
3232
cacheChunkEvict = stats.NewCounter32("cache.ops.chunk.evict")
3333

34-
cacheSizeMax = stats.NewGauge64("cache.size.max")
35-
cacheSizeUsed = stats.NewGauge64("cache.size.used")
34+
cacheSizeMax = stats.NewGauge64("cache.size.max")
35+
cacheSizeUsed = stats.NewGauge64("cache.size.used")
36+
accntEventAddDuration = stats.NewLatencyHistogram15s32("cache.accounting.queue.add")
37+
accntEventQueueUsed = stats.NewRange32("cache.accounting.queue.size.used")
38+
accntEventQueueMax = stats.NewGauge64("cache.accounting.queue.size.max")
3639
)

0 commit comments

Comments
 (0)