@@ -35,7 +35,7 @@ type DefaultMultiTenantManager struct {
35
35
36
36
// Structs for holding per-user Prometheus rules Managers
37
37
// and a corresponding metrics struct
38
- userManagerMtx sync.Mutex
38
+ userManagerMtx sync.RWMutex
39
39
userManagers map [string ]RulesManager
40
40
userManagerMetrics * ManagerMetrics
41
41
@@ -50,6 +50,10 @@ type DefaultMultiTenantManager struct {
50
50
configUpdatesTotal * prometheus.CounterVec
51
51
registry prometheus.Registerer
52
52
logger log.Logger
53
+
54
+ ruleCache map [string ][]* promRules.Group
55
+ ruleCacheMtx sync.RWMutex
56
+ syncRuleMtx sync.Mutex
53
57
}
54
58
55
59
func NewDefaultMultiTenantManager (cfg Config , managerFactory ManagerFactory , evalMetrics * RuleEvalMetrics , reg prometheus.Registerer , logger log.Logger ) (* DefaultMultiTenantManager , error ) {
@@ -85,6 +89,7 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, eva
85
89
mapper : newMapper (cfg .RulePath , logger ),
86
90
userManagers : map [string ]RulesManager {},
87
91
userManagerMetrics : userManagerMetrics ,
92
+ ruleCache : map [string ][]* promRules.Group {},
88
93
managersTotal : promauto .With (reg ).NewGauge (prometheus.GaugeOpts {
89
94
Namespace : "cortex" ,
90
95
Name : "ruler_managers_total" ,
@@ -111,15 +116,17 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, eva
111
116
}
112
117
113
118
func (r * DefaultMultiTenantManager ) SyncRuleGroups (ctx context.Context , ruleGroups map [string ]rulespb.RuleGroupList ) {
114
- // A lock is taken to ensure if this function is called concurrently, then each call
115
- // returns after the call map files and check for updates
116
- r .userManagerMtx .Lock ()
117
- defer r .userManagerMtx .Unlock ()
119
+ // this is a safety lock to ensure this method is executed sequentially
120
+ r .syncRuleMtx .Lock ()
121
+ defer r .syncRuleMtx .Unlock ()
118
122
119
123
for userID , ruleGroup := range ruleGroups {
120
124
r .syncRulesToManager (ctx , userID , ruleGroup )
121
125
}
122
126
127
+ r .userManagerMtx .Lock ()
128
+ defer r .userManagerMtx .Unlock ()
129
+
123
130
// Check for deleted users and remove them
124
131
for userID , mngr := range r .userManagers {
125
132
if _ , exists := ruleGroups [userID ]; ! exists {
@@ -142,6 +149,18 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou
142
149
r .managersTotal .Set (float64 (len (r .userManagers )))
143
150
}
144
151
152
+ func (r * DefaultMultiTenantManager ) updateRuleCache (user string , rules []* promRules.Group ) {
153
+ r .ruleCacheMtx .Lock ()
154
+ defer r .ruleCacheMtx .Unlock ()
155
+ r .ruleCache [user ] = rules
156
+ }
157
+
158
+ func (r * DefaultMultiTenantManager ) deleteRuleCache (user string ) {
159
+ r .ruleCacheMtx .Lock ()
160
+ defer r .ruleCacheMtx .Unlock ()
161
+ delete (r .ruleCache , user )
162
+ }
163
+
145
164
// syncRulesToManager maps the rule files to disk, detects any changes and will create/update the
146
165
// the users Prometheus Rules Manager.
147
166
func (r * DefaultMultiTenantManager ) syncRulesToManager (ctx context.Context , user string , groups rulespb.RuleGroupList ) {
@@ -154,25 +173,25 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
154
173
return
155
174
}
156
175
157
- manager , exists := r .userManagers [user ]
158
- if ! exists || update {
176
+ existing := true
177
+ manager := r .getRulesManager (user , ctx )
178
+ if manager == nil {
179
+ existing = false
180
+ manager = r .createRulesManager (user , ctx )
181
+ }
182
+
183
+ if manager == nil {
184
+ return
185
+ }
186
+
187
+ if ! existing || update {
159
188
level .Debug (r .logger ).Log ("msg" , "updating rules" , "user" , user )
160
189
r .configUpdatesTotal .WithLabelValues (user ).Inc ()
161
- if ! exists {
162
- level .Debug (r .logger ).Log ("msg" , "creating rule manager for user" , "user" , user )
163
- manager , err = r .newManager (ctx , user )
164
- if err != nil {
165
- r .lastReloadSuccessful .WithLabelValues (user ).Set (0 )
166
- level .Error (r .logger ).Log ("msg" , "unable to create rule manager" , "user" , user , "err" , err )
167
- return
168
- }
169
- // manager.Run() starts running the manager and blocks until Stop() is called.
170
- // Hence run it as another goroutine.
171
- go manager .Run ()
172
- r .userManagers [user ] = manager
190
+ if update && existing {
191
+ r .updateRuleCache (user , manager .RuleGroups ())
173
192
}
174
-
175
193
err = manager .Update (r .cfg .EvaluationInterval , files , r .cfg .ExternalLabels , r .cfg .ExternalURL .String (), ruleGroupIterationFunc )
194
+ r .deleteRuleCache (user )
176
195
if err != nil {
177
196
r .lastReloadSuccessful .WithLabelValues (user ).Set (0 )
178
197
level .Error (r .logger ).Log ("msg" , "unable to update rule manager" , "user" , user , "err" , err )
@@ -184,6 +203,29 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
184
203
}
185
204
}
186
205
206
+ func (r * DefaultMultiTenantManager ) getRulesManager (user string , ctx context.Context ) RulesManager {
207
+ r .userManagerMtx .RLock ()
208
+ defer r .userManagerMtx .RUnlock ()
209
+ return r .userManagers [user ]
210
+ }
211
+
212
+ func (r * DefaultMultiTenantManager ) createRulesManager (user string , ctx context.Context ) RulesManager {
213
+ r .userManagerMtx .Lock ()
214
+ defer r .userManagerMtx .Unlock ()
215
+
216
+ manager , err := r .newManager (ctx , user )
217
+ if err != nil {
218
+ r .lastReloadSuccessful .WithLabelValues (user ).Set (0 )
219
+ level .Error (r .logger ).Log ("msg" , "unable to create rule manager" , "user" , user , "err" , err )
220
+ return nil
221
+ }
222
+ // manager.Run() starts running the manager and blocks until Stop() is called.
223
+ // Hence run it as another goroutine.
224
+ go manager .Run ()
225
+ r .userManagers [user ] = manager
226
+ return manager
227
+ }
228
+
187
229
func ruleGroupIterationFunc (ctx context.Context , g * promRules.Group , evalTimestamp time.Time ) {
188
230
logMessage := []interface {}{
189
231
"msg" , "evaluating rule group" ,
@@ -269,13 +311,25 @@ func (r *DefaultMultiTenantManager) getOrCreateNotifier(userID string, userManag
269
311
return n .notifier , nil
270
312
}
271
313
314
+ func (r * DefaultMultiTenantManager ) getCachedRules (userID string ) ([]* promRules.Group , bool ) {
315
+ r .ruleCacheMtx .RLock ()
316
+ defer r .ruleCacheMtx .RUnlock ()
317
+ groups , exists := r .ruleCache [userID ]
318
+ return groups , exists
319
+ }
320
+
272
321
func (r * DefaultMultiTenantManager ) GetRules (userID string ) []* promRules.Group {
273
322
var groups []* promRules.Group
274
- r .userManagerMtx .Lock ()
275
- if mngr , exists := r .userManagers [userID ]; exists {
323
+ groups , cached := r .getCachedRules (userID )
324
+ if cached {
325
+ return groups
326
+ }
327
+ r .userManagerMtx .RLock ()
328
+ mngr , exists := r .userManagers [userID ]
329
+ r .userManagerMtx .RUnlock ()
330
+ if exists {
276
331
groups = mngr .RuleGroups ()
277
332
}
278
- r .userManagerMtx .Unlock ()
279
333
return groups
280
334
}
281
335
0 commit comments