-
Notifications
You must be signed in to change notification settings - Fork 14
/
predict.c
363 lines (334 loc) · 12.1 KB
/
predict.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
/*
* * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* * This code is free software; you can redistribute it and/or modify it
* * under the terms of the GNU General Public License version 2 only, as
* * published by the Free Software Foundation.
* *
* * This code is distributed in the hope that it will be useful, but WITHOUT
* * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* * version 2 for more details (a copy is included in the LICENSE file that
* * accompanied this code).
* *
* * You should have received a copy of the GNU General Public License version
* * 2 along with this work; if not, write to the Free Software Foundation,
* * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* *
* * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* * or visit www.oracle.com if you need additional information or have any
* * questions.
* */
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "predict.h"
/*
* This function inserts the given value into the list of most recently seen
* data and returns the parameters, m and c, of a straight line of the form
* y = mx + c that, according to the the method of least squares, fits them
* best. The formulation is for the special case in which x_i = i + 1 - N;
* this reduces the need for storage and permits constant time updates.
*/
static int
lsq_fit(struct lsq_struct *lsq, long long new_y, long long new_x,
long long *m, long long *c)
{
long long sigma_x, sigma_y;
long sigma_xy, sigma_xx;
long long slope_divisor;
int i, next;
long x_offset;
next = lsq->next++;
lsq->x[next] = new_x;
lsq->y[next] = new_y;
if (lsq->next == LSQ_LOOKBACK) {
lsq->next = 0;
/*
* Lookback window is fill which means a reasonable
* best fit line can be computed. Flag enough data
* is available in lookback window now.
*/
lsq->ready = 1;
}
/*
* If lookback window is not full, do not continue with
* computing slope and intercept of best fit line.
*/
if (!lsq->ready)
return -1;
/*
* If lookback window is full, compute slope and intercept
* for the best fit line. In the process of computing those, we need
* to compute squares of values along x-axis. Sqaure values can be
* large enough to overflow 64-bits if they are large enough to
* begin with. To solve this problem, transform the line on
* x-axis so the first point falls at x=0. Since lsq->x is a
* circular buffer, lsq->next points to the oldest entry in this
* buffer.
*/
x_offset = lsq->x[lsq->next];
for (i = 0; i < LSQ_LOOKBACK; i++)
lsq->x[i] -= x_offset;
/*
* Lookback window is full. Compute slope and intercept
* for the best fit line
*/
sigma_x = sigma_y = sigma_xy = sigma_xx = 0;
for (i = 0; i < LSQ_LOOKBACK; i++) {
sigma_x += lsq->x[i];
sigma_y += lsq->y[i];
sigma_xy += (lsq->x[i] * lsq->y[i]);
sigma_xx += (lsq->x[i] * lsq->x[i]);
}
/*
* guard against divide-by-zero
*/
slope_divisor = LSQ_LOOKBACK * sigma_xx - sigma_x * sigma_x;
if (slope_divisor == 0)
return -1;
*m = ((LSQ_LOOKBACK * sigma_xy - sigma_x * sigma_y) * 100) / slope_divisor;
*c = (sigma_y - *m * sigma_x) / LSQ_LOOKBACK;
/*
* Restore original values for x-axis
*/
for (i = 0; i < LSQ_LOOKBACK; i++)
lsq->x[i] += x_offset;
return 0;
}
/*
* This function determines whether it is necessary to begin
* reclamation/compaction now in order to avert exhaustion of any of the
* free lists.
*
* Its basis is a simple model in which the total free memory, f_T, is
* consumed at a constant rate, R_T, i.e.
*
* f_T(t) = R_T * t + f_T(0)
*
* For any given order, o > 0, members of subordinate lists constitute
* fragmented free memory, f_f(o): the blocks are notionally free but
* they are unavailable for allocation. The fragmented free memory is
* also assumed to behave linearly and in the absence of compaction is
* given by
*
* f_f(o, t) = R_f(o) t + f_f(o, 0)
*
* Order 0 function represents current trend line for total free pages
* instead.
*
* It is assumed that all allocations will be made from contiguous
* memory meaning that, under net memory pressure and with no change in
* fragmentation, f_T will become equal to f_f and subsequent allocations
* will stall in either direct compaction or reclaim. Preemptive compaction
* will delay the onset of exhaustion but, to be useful, must begin early
* enough and must proceed at a sufficient rate.
*
* On each invocation, this function obtains estimates for the
* parameters f_T(0), R_T, f_f(o, 0) and R_f(o). Using the best fit
* line, it then determines if reclamation or compaction should be started
* now to avert free pages exhaustion or severe fragmentation. Return value
* is a set of bits which represent which condition has been observed -
* potential free memory exhaustion, and potential severe fragmentation.
*/
unsigned long
predict(struct frag_info *frag_vec, struct lsq_struct *lsq,
unsigned long high_wmark, unsigned long low_wmark, int nid)
{
int order;
long long m[MAX_ORDER];
long long c[MAX_ORDER];
int is_ready = 1;
unsigned long retval = 0;
unsigned long time_taken, time_to_catchup;
long long x_cross, current_time;
struct timespec tspec;
/*
* Compute the trend line for fragmentation on each order page.
* For order 0 pages, it will be a trend line showing rate
* of consumption of pages. For higher order pages, trend line
* shows loss/gain of pages of that order. When the trend line
* for example for order n pages intersects with trend line for
* total free pages, it means all available pages are of order
* (n-1) or lower and there is 100% fragmentation of order n
* pages. Kernel must compact pages at this point to gain
* new order n pages.
*/
for (order = 0; order < MAX_ORDER; order++) {
if (lsq_fit(&lsq[order], frag_vec[order].free_pages,
frag_vec[order].msecs, &m[order],
&c[order]) == -1)
is_ready = 0;
}
if (!is_ready)
return retval;
#if 0
if (frag_vec[0].free_pages < high_wmark) {
retval |= MEMPREDICT_RECLAIM;
return retval;
}
#endif
/*
* Trend line for each order page is available now. If the trend
* line for overall free pages is trending upwards (positive
* slope), there is no need to reclaim pages but there may be
* need to compact pages if system is running out of contiguous pages
* for higher orders.
*/
if (m[0] >= 0) {
/*
* Since number of free pages is going up, it is
* time to adjust watermarks down.
*/
retval |= MEMPREDICT_LOWER_WMARKS;
}
else {
/*
* Trend line for overall free pages is showing a
* negative trend. Check if we are approaching high
* watermark faster than pages are being reclaimed.
* If a reclaim rate has not been computed yet, do not
* compute if it is time to start reclamation
*/
if (reclaim_rate == 0)
return 0;
/*
* If number of free pages is already below high watermark,
* it is time to kick off reclamation. If not, compute
* time it will take to go below high_wmark.
*/
if (frag_vec[0].free_pages <= high_wmark) {
retval |= MEMPREDICT_RECLAIM;
log_info(2, "Reclamation recommended due to free pages being below high watermark");
log_info(2, "Consumption rate on node %d=%ld pages/msec, reclaim rate is %ld pages/msec, Free pages=%ld, low wmark=%ld, high wmark=%ld", nid, abs(m[0]), reclaim_rate, frag_vec[0].free_pages, low_wmark, high_wmark);
}
else {
time_taken = (frag_vec[0].free_pages - high_wmark)
/ abs(m[0]);
/*
* Time to reclaim frag_vec[0].free_pages - high_wmark
*/
time_to_catchup = (frag_vec[0].free_pages -
high_wmark) / reclaim_rate;
/*
* If time taken to go below high_wmark is fairly
* close to time it will take to reclaim enough
* then we need to start kswapd now. We will use
* 3 times of the time to catch up as threshold
* just to ensure reclamationprocess has enough time.
* This helps eliminate the possibility of forcing
* reclamation when time to go below high watermark
* is too far in the future.
*/
if (time_taken <= (3*time_to_catchup)) {
log_info(3, "Reclamation recommended due to high memory consumption rate");
log_info(3, "Consumption rate on node %d=%ld pages/msec, reclaim rate is %ld pages/msec, Free pages=%ld, low wmark=%ld, high wmark=%ld", nid, abs(m[0]), reclaim_rate, frag_vec[0].free_pages, low_wmark, high_wmark);
log_info(3, "Time to below high watermark= %ld msec, time to catch up=%ld msec", time_taken, time_to_catchup);
retval |= MEMPREDICT_RECLAIM;
}
}
}
/*
* Check if system is running low on higher order pages and needs
* compaction.
*/
for (order = max_compaction_order; order > 0; order--) {
/*
* If lines are parallel, then they never intersect.
*/
if (m[0] == m[order])
continue;
/*
* If number of high order pages is increasing, no
* need to take any action
*/
if (m[order] < 0)
continue;
/*
* If system is unable to comapct pages, no point
* in forcing compaction
*/
if (compaction_rate == 0)
return 0;
/*
* Find the point of intersection of the two lines.
* The point of intersection represents 100%
* fragmentation for this order.
*/
x_cross = ((c[0] - c[order]) * 100) /
(m[order] - m[0]);
#if 0
y_cross = ((m[order] * c[0]) - (m[0] * c[order])) /
(m[order] - m[0]);
#endif
/*
* If they intersect anytime soon in the future
* or intersected recently in the past, then it
* is time for compaction and there is no need
* to continue evaluating remaining order pages.
* If intersection was in the past, it can be
* outside of current lookback window which means
* x_cross can be negative.
*
* To check if intersection is in near future,
* get the current time relative to x=0 on our
* graph.
*/
clock_gettime(CLOCK_MONOTONIC_RAW, &tspec);
current_time = tspec.tv_sec*1000 + tspec.tv_nsec/1000 - lsq->x[lsq->next];;
if (current_time < 0)
current_time = 0;
if ((x_cross < 0) ||
(x_cross < current_time)) {
unsigned long higher_order_pages =
frag_vec[MAX_ORDER - 1].free_pages -
frag_vec[order].free_pages;
/*
* Check if there are enough higher order pages
* that could be broken into pages of current order
* given current rate of consumption and time
* remaining. If not, comapct now.
*/
if (higher_order_pages < (m[order] * x_cross)) {
log_info(2, "Compaction recommended on node %d. Running out of order %d pages", nid, order);
if (order < (MAX_ORDER -1))
log_info(3, "No. of free order %d pages = %ld base pages, consumption rate=%ld pages/msec", order, (frag_vec[order+1].free_pages - frag_vec[order].free_pages), m[order]);
log_info(3, "Current compaction rate=%ld pages/msec", compaction_rate);
retval |= MEMPREDICT_COMPACT;
break;
}
}
else {
/*
* How long before we run out of current order
* pages. We will constrain the size of window
* we look forward in since large window means
* the consumption trend can change in that time.
* It will be prudent to defer the decision to
* initiate compaction until the exhaustion period
* falls within this window.
*/
unsigned long largest_window;
largest_window = 5 * LSQ_LOOKBACK * periodicity * 1000;
time_taken = x_cross - current_time;
if (time_taken > largest_window)
continue;
/*
* How long will it take to compact as many pages as
* available current order pages
*/
time_to_catchup = (frag_vec[order+1].free_pages - frag_vec[order].free_pages) / compaction_rate;
if (time_taken >= time_to_catchup) {
log_info(3, "Compaction recommended on node %d. Order %d pages consumption rate is high", nid, order);
if (order < (MAX_ORDER -1))
log_info(3, "No. of free order %d pages = %ld base pages, consumption rate=%ld pages/msec", order, (frag_vec[order+1].free_pages - frag_vec[order].free_pages), m[order]);
log_info(3, "Current compaction rate=%ld pages/msec, Exhaustion in %ld msec", compaction_rate, time_taken);
retval |= MEMPREDICT_COMPACT;
break;
}
}
}
return retval;
}