forked from arcanecode/KQL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
m4-demo-advanced-aggregations.csl
471 lines (361 loc) · 15.4 KB
/
m4-demo-advanced-aggregations.csl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
//------------------------------------------------------------------------------
// Kusto Query Language (KQL) From Scratch
// Module 4 - Advanced Aggregations
//
// The demos in this module serve as a very basic introduction to the KQL
// language within the Azure Log Analytics environment.
//
// Copyright (c) 2018. Microsoft, Pluralsight, Robert C. Cain.
// All rights reserved. This code may be used in part within your own
// applications.
//
// This code may NOT be redistributed in it's entirely without permission
// of one of it's copyright holders.
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// arg_max / arg_min
//------------------------------------------------------------------------------
// arg_max finds the maximum value for the column being summarized on, and
// returns the row where that maximum value was found
Perf
| summarize arg_max(CounterValue, *) by CounterName
| sort by CounterName asc
// The second parameter indicates which columns to return. * means all.
// By default it always returns the "by" column and the maximized value.
//
Perf
| summarize arg_max(CounterValue, Computer, ObjectName) by CounterName
| sort by CounterName asc
// Showing all the rows sorted by value so you can see the max value
Perf
| where CounterName == "% Available Memory"
| sort by CounterValue desc
// arg_min does the same except, of course, finding the minimum value for
// the column being summarized on, and returning rows where that min
// value was found
Perf
| project CounterName, CounterValue
| summarize arg_min(CounterValue, *) by CounterName
| sort by CounterName asc
// Showing all the rows sorted by value so you can see the min value
Perf
| where CounterName == "% Available Memory"
| sort by CounterValue asc
//------------------------------------------------------------------------------
// Makeset / Makelist
//------------------------------------------------------------------------------
// Makeset - Creates a json dataset by flattening a heirarcy.
Perf
| summarize Counters = makeset(CounterName) by ObjectName
// Somthing more useful, getting a list of PCs low on disk space.
Perf
| where CounterName == "% Free Space"
and CounterValue <= 30
| summarize Computers = makeset(Computer)
// We'll see examples of how to use these sets in the next module,
// Working with Datasets.
// Makelist - Similar to makeset, but duplicates are not removed
Perf
| where CounterName == "% Free Space"
and CounterValue <= 30
| summarize Computers = makelist(Computer)
// Note that makeset and makelist have an optional second parameter that
// sets the max size of the returned list, which by default is 128
Perf
| where CounterName == "% Free Space"
and CounterValue <= 30
| summarize Computers = makelist(Computer, 256)
//------------------------------------------------------------------------------
// mvexpand
//------------------------------------------------------------------------------
// mvexpand takes a dynamic value (like a set or list) and converts it back
// into individual rows
Perf
| where CounterName == "% Free Space"
and CounterValue <= 30
| summarize Computers = makeset(Computer)
| mvexpand Computers
SecurityAlert
| extend ExtProps=todynamic(ExtendedProperties)
| mvexpand ExtProps
| project TimeGenerated
, DisplayName
, AlertName
, AlertSeverity
, ExtProps
//------------------------------------------------------------------------------
// Percentiles
//------------------------------------------------------------------------------
// Percentiles calculates the value that is greater than x% of the sampleset.
// Here, it shows the CounterValue that is higher than 5% of the other
// values in the group, then the value that is greater than 50% of the
// incoming data, and finally a value that is greater than 95% of the data.
Perf
| where CounterName == "Available MBytes"
| summarize percentiles(CounterValue, 5, 50, 95) by Computer
// if this is one output row:
// Computer percentile_CounterValue_5 percentile_CounterValue_50 percentile_CounterValue_95
// ContosoWebVM1 629.7383905746 2,605.4022856667 12,539.1563912150
// Of all of the performance counter records for computer ContosoWebVM1 in the time range,
// 5% of the records show available disk space of 629.7383905746 or less
// 50% of the records show available disk space of 2,605.4022856667 or less
// 95% of the records show available disk space of 12,539.1563912150 or less
// If, for example, we say that less than 1,000MB is a condition to be concerned
// over, then only 5% of our records show we fall below this amount. At least
// 50% of the records show we are OK (in other words > 1,000 MB)
// You can rename the columns if you need to using project-rename
Perf
| where CounterName == "Available MBytes"
| summarize percentiles(CounterValue, 5, 50, 95) by Computer
| project-rename Percent05 = percentile_CounterValue_5
, Percent50 = percentile_CounterValue_50
, Percent95 = percentile_CounterValue_95
// Or you can rename in the summarize call
Perf
| where CounterName == "Available MBytes"
| summarize (Percent05, Percent50, Percent95)
= percentiles(CounterValue, 5, 50, 95)
by Computer
// You can set your own percentile levels
Perf
| where CounterName == "Available MBytes"
| summarize percentiles(CounterValue, 10, 30, 50, 70, 90) by CounterName
| sort by CounterName asc
// You can also return the data as a single column array using percentiles_array
Perf
| summarize percentiles_array(CounterValue, 5, 50, 95) by CounterName
// And then pivot the data so instead of coming back as columns, they
// now appear as rows
Perf
| summarize percentarray = percentiles_array(CounterValue, 5, 50, 95) by CounterName
| mvexpand percentarray
//------------------------------------------------------------------------------
// dcount
//------------------------------------------------------------------------------
// dcount returnes an ESTIMATED number of DISTINCT rows, as opposed to counting
// on a distinct dataset, which is always accurate but slow. As a result dcount
// calculates faster especially over a very large dataset.
// Security events have an id that describes the type of event
SecurityEvent
| distinct EventID, Activity
// For comparison, count the different types of security events that occured
// to a computer in the recent past
SecurityEvent
| where TimeGenerated >= ago(90d)
| distinct Computer, EventID
| summarize EventTypeCount = count(EventID) by Computer
| sort by Computer asc
// Now we can use dcount to get an estimate of these
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcount(EventID) by Computer
| sort by Computer asc
// A second parameter lets you set the accuracy level
// 0 = Least accurate, 1.6% error
// 1 = Default, balances accuracy and time, 0.8% error level
// 2 = Accurate but slow, 0.4% error
// 3 = Extra accurate but slowest, 0.28% error level
// 0 = Least accurate, 1.6% error
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcount(EventID, 0) by Computer
| sort by Computer asc
// 1 = Default, balances accuracy and time, 0.8% error level
// (Same as not using the second parameter in the first example)
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcount(EventID, 1) by Computer
| sort by Computer asc
// 2 = Accurate but slow, 0.4% error
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcount(EventID, 2) by Computer
| sort by Computer asc
// 3 = Extra accurate but slowest, 0.28% error level
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcount(EventID, 3) by Computer
| sort by Computer asc
//------------------------------------------------------------------------------
// dcountif
//------------------------------------------------------------------------------
// dcountif is similar to dcount, except it allows you to embed an if condition
// within the function call
// Here we are only looking for EventIDs in a specific list
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcountif( EventID
, EventID in (4625, 4688, 4624, 4672, 4670, 4689, 4634, 4674)
)
by Computer
| sort by Computer asc
// dcountif has a similar accuracy switch as dcount, although it omits level 3
// 0 = Least accurate, 1.6% error
// 1 = Default, balances accuracy and time, 0.8% error level
// 2 = Accurate but slow, 0.4% error
// 0 = Least accurate, 1.6% error
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcountif( EventID
, EventID in (4625, 4688, 4624, 4672, 4670, 4689, 4634, 4674)
, 0
)
by Computer
| sort by Computer asc
// 1 = Default, balances accuracy and time, 0.8% error level
// (Same as not using it)
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcountif( EventID
, EventID in (4625, 4688, 4624, 4672, 4670, 4689, 4634, 4674)
, 1
)
by Computer
| sort by Computer asc
// 2 = Accurate but slow, 0.4% error
SecurityEvent
| where TimeGenerated >= ago(90d)
| summarize dcountif( EventID
, EventID in (4625, 4688, 4624, 4672, 4670, 4689, 4634, 4674)
, 2
)
by Computer
| sort by Computer asc
// For comparison, here is a similar version using count
SecurityEvent
| where TimeGenerated >= ago(90d)
| distinct Computer, EventID
| summarize EventTypeCount = countif( EventID in ( 4625, 4688, 4624, 4672
, 4670, 4689, 4634, 4674
)
) by Computer
| sort by Computer asc
//------------------------------------------------------------------------------
// countif
//------------------------------------------------------------------------------
// Like dcountif, countif allows you to add a filter condition.
Perf
| summarize RowCount = countif(CounterName contains "Bytes") by CounterName
| sort by CounterName asc
// Similar query without Countif, note that rows with a 0 count are suppressed
Perf
| where CounterName contains "Bytes"
| summarize count() by CounterName
| sort by CounterName asc
// We can add an extra where to remove the 0 valued rows
Perf
| summarize RowCount = countif(CounterName contains "Bytes") by CounterName
| where RowCount > 0
| sort by CounterName asc
//------------------------------------------------------------------------------
// pivot
//------------------------------------------------------------------------------
// Let's say you have a report of computers, the event, and how many they had
Event
| project Computer, EventLevelName
| summarize count() by Computer, EventLevelName
// A bit hard to read though, it'd make much more sense to have the event levels
// come in as columns, in a spreadsheet like fashion. That's what Pivot will
// do for you!
Event
| project Computer, EventLevelName
| evaluate pivot(EventLevelName)
| sort by Computer asc
//------------------------------------------------------------------------------
// top-nested
//------------------------------------------------------------------------------
// top-nested does nested measurements.
// In this example, it first gets the top 3 ObjectNames as measured by their
// count. For each one of those, it gets the top 3 CounterNames, as measured
// by their count
Perf
| top-nested 3 of ObjectName by ObjectCount = count()
, top-nested 3 of CounterName by CounterNameCount = count()
// You can have as many items as you want, and go to many levels
Perf
| top-nested 5 of ObjectName by ObjectCount = count()
, top-nested 5 of CounterName by CounterNameCount = count()
, top-nested 5 of InstanceName by InstanceCount = count()
| sort by ObjectName asc
, CounterName asc
, InstanceName asc
// top-nested supports many different aggregations, including:
// sum(), count(), max(), min(), dcount(), avg(), percentile(), percentilew(),
// or any algebric combination of these aggregation
Perf
| top-nested 5 of ObjectName by ObjectSum = sum(CounterValue)
, top-nested 5 of CounterName by CounterNameSum = sum(CounterValue)
, top-nested 5 of InstanceName by InstanceSum = sum(CounterValue)
| sort by ObjectName asc
, CounterName asc
, InstanceName asc
// top-nested can also include a row for others not included in the top X
Perf
| top-nested 3 of ObjectName with others = "All Other Objects"
by ObjectCount = count()
, top-nested 3 of CounterName
by CounterNameCount = count()
| sort by ObjectName asc
, CounterName asc
// Other can be at all levels
Perf
| top-nested 3 of ObjectName
with others = "All Other Objects"
by ObjectCount = count()
, top-nested 3 of CounterName
with others = "All Other Counters"
by CounterNameCount = count()
| sort by ObjectName asc
, CounterName asc
// or even just at the sub levels
Perf
| top-nested 3 of ObjectName
by ObjectCount = count()
, top-nested 3 of CounterName
with others = "All Other Counters"
by CounterNameCount = count()
| sort by ObjectName asc
, CounterName asc
//------------------------------------------------------------------------------
// max / min
//------------------------------------------------------------------------------
// max is simple, it just returns the maximum value for the column being
// passed in.
Perf
| where CounterName == "Free Megabytes"
| summarize max(CounterValue)
// min is almost identical
Perf
| where CounterName == "Free Megabytes"
| summarize min(CounterValue)
// If you want to return a more complex result set with additional columns,
// use arg_max/arg_min (shown at the top of this module's demos)
//------------------------------------------------------------------------------
// sum / sumif
//------------------------------------------------------------------------------
// Like max, sum is very simple, just returns a grand total for the indicated
// column
Perf
| where CounterName == "Free Megabytes"
| summarize sum(CounterValue)
// sumif allows you to add your condition to the parameter list
Perf
| summarize sumif(CounterValue, CounterName == "Free Megabytes")
//------------------------------------------------------------------------------
// any
//------------------------------------------------------------------------------
// any is a random item generator. When used with an *, it returns a random
// row from the input dataset
Perf
| summarize any(*)
// You can also pass in a specific column name to return just that column
Perf
| summarize any(Computer)
// You can specify a combination of columns
Perf
| summarize any(ObjectName, CounterName, CounterValue)
// When you use with by, it returns a random row for each distinct value
// in the column after the by clause
Perf
| summarize any(*) by CounterName
| sort by CounterName asc