diff --git a/CHANGELOG.md b/CHANGELOG.md index 27901b36..c1536c6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ * Improved the handling of track log timestamps - these can now be supplied by the client and are no longer bound to insert time of DB record. Latest Hoist React uses *start* of the tracked activity. * Support for persisting of memory monitoring results +* New built-in monitor `xhClientErrorsMonitor` +* New methods `MonitorResult.getParam` and `MonitorResult.getRequiredParam` ### ⚙️ Technical diff --git a/src/main/groovy/io/xh/hoist/monitor/MonitorResult.groovy b/src/main/groovy/io/xh/hoist/monitor/MonitorResult.groovy index 8a78e8e9..d9ecf281 100644 --- a/src/main/groovy/io/xh/hoist/monitor/MonitorResult.groovy +++ b/src/main/groovy/io/xh/hoist/monitor/MonitorResult.groovy @@ -39,6 +39,17 @@ class MonitorResult implements JSONFormat { monitor.params ? JSONParser.parseObject(monitor.params) : [:] } + T getParam(String name, T defaultVal = null) { + params.containsKey(name) ? params[name] : defaultVal + } + + T getRequiredParam(String name) { + if (!params.containsKey(name)) { + throw new RuntimeException("Missing required parameter ${name}") + } + params[name] + } + /** Combines the given string with 'message', separated by formatting */ void prependMessage(String prependStr) { // Space character before the newlines is for fallback formatting in `hoist-react <= v51.0.0` diff --git a/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy b/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy index d213be9f..b467866f 100644 --- a/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy +++ b/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy @@ -7,13 +7,15 @@ package io.xh.hoist.monitor.provided +import grails.gorm.transactions.ReadOnly import grails.gorm.transactions.Transactional import groovy.sql.Sql import io.xh.hoist.BaseService -import io.xh.hoist.data.filter.Filter import io.xh.hoist.monitor.Monitor import io.xh.hoist.monitor.MonitorResult import io.xh.hoist.util.Utils +import io.xh.hoist.clienterror.ClientError +import io.xh.hoist.track.TrackLog import static io.xh.hoist.monitor.MonitorStatus.FAIL import static io.xh.hoist.monitor.MonitorStatus.INACTIVE @@ -51,18 +53,14 @@ class DefaultMonitorDefinitionService extends BaseService { return } - def aggregate = result.params.aggregate ?: 'avg' - if (!['avg', 'max'].contains(aggregate)) { - throw new RuntimeException("Invalid aggregate parameter: ${result.params.aggregate}") + def aggregate = result.getParam('aggregate', 'avg') + if (!(aggregate in ['avg', 'max'])) { + throw new RuntimeException("Invalid aggregate parameter: $aggregate") } - def lookbackMinutes = result.params.lookbackMinutes - if (!lookbackMinutes) { - throw new RuntimeException('No \"lookbackMinutes\" parameter provided') - } - - def cutOffTime = currentTimeMillis() - lookbackMinutes * MINUTES - def snapshots = memoryMonitoringService.snapshots.findAll {it.key > cutOffTime}.values() + def lookback = result.getRequiredParam('lookbackMinutes') * MINUTES, + cutoffTime = currentTimeMillis() - lookback, + snapshots = memoryMonitoringService.snapshots.findAll {it.key > cutoffTime}.values() if (!snapshots) { result.metric = 0 @@ -74,42 +72,26 @@ class DefaultMonitorDefinitionService extends BaseService { : snapshots.max{it.usedPctMax}.usedPctMax } + @ReadOnly + def xhClientErrorsMonitor(MonitorResult result) { + def lookback = result.getRequiredParam('lookbackMinutes') * MINUTES, + cutoffDate = new Date(currentTimeMillis() - lookback) + + result.metric = ClientError.countByDateCreatedGreaterThan(cutoffDate) + } + + @ReadOnly def xhLoadTimeMonitor(MonitorResult result) { if (!trackLogAdminService.enabled) { result.status = INACTIVE return } - def lookbackMinutes = result.params.lookbackMinutes - if (!lookbackMinutes) { - throw new RuntimeException('No \"lookbackMinutes\" parameter provided.') - } - - def cutOffTime = currentTimeMillis() - lookbackMinutes * MINUTES - def logs = trackLogAdminService.queryTrackLog( - Filter.parse([ - filters: [ - [ - field: 'dateCreated', - op: '>', - value: new Date(cutOffTime) - ], - [ - field: 'elapsed', - op: '!=', - value: null - ] - ], - op: "AND" - ]) - ) - - if (!logs) { - result.metric = 0 - return - } + def lookback = result.getRequiredParam('lookbackMinutes') * MINUTES, + cutoffDate = new Date(currentTimeMillis() - lookback), + logs = TrackLog.findAllByDateCreatedGreaterThanAndElapsedIsNotNull(cutoffDate) - result.metric = logs.max{it.elapsed}.elapsed / SECONDS + result.metric = logs ? logs.max{it.elapsed}.elapsed / SECONDS : 0 } def xhDbConnectionMonitor(MonitorResult result) { @@ -117,7 +99,7 @@ class DefaultMonitorDefinitionService extends BaseService { Sql sql = new Sql(dataSource) try { // Support configurable table name for edge case where XH tables are in a custom schema. - def tableName = result.params.tableName ?: 'xh_monitor' + def tableName = result.getParam('tableName', 'xh_monitor') sql.rows("SELECT * FROM ${Sql.expand(tableName)} WHERE code = 'xhDbConnectionMonitor'") } finally { sql.close() @@ -132,12 +114,9 @@ class DefaultMonitorDefinitionService extends BaseService { return } - if (!result.params.queryUser) { - throw new RuntimeException("No \"queryUser\" parameter provided.") - } - - def startTime = currentTimeMillis() - def user = ldapService.lookupUser(result.params.queryUser) + def queryUser = result.getRequiredParam('queryUser'), + user = ldapService.lookupUser(queryUser), + startTime = currentTimeMillis() if (!user) { result.message = "Failed to find expected user: ${result.params.queryUser}" @@ -177,6 +156,18 @@ class DefaultMonitorDefinitionService extends BaseService { + 'Set "aggregate" to "avg" to report average heap usage (default).\n' + 'Set "aggregate" to "max" to report the largest heap usage.' ], + [ + code: 'xhClientErrorsMonitor', + name: 'Client Errors (Last 30m)', + metricType: 'Ceil', + metricUnit: 's', + warnThreshold: 1, + failThreshold: 10, + active: true, + primaryOnly: true, + params: '{\n\t"lookbackMinutes": 30\n}', + notes: 'Reports the longest tracked event in the last {lookbackMinutes} minutes.' + ], [ code: 'xhLoadTimeMonitor', name: 'Max Load Time (Last 30m)',