diff --git a/.github/workflows/nightly-playground-deploy.yml b/.github/workflows/nightly-playground-deploy.yml index 892726a..2da8314 100644 --- a/.github/workflows/nightly-playground-deploy.yml +++ b/.github/workflows/nightly-playground-deploy.yml @@ -133,25 +133,30 @@ jobs: echo "dist_version does not belong to 2x or 3x" fi - register-snapshot-repo-enable-alerts: + configure-alerts-notifications: needs: validate-and-deploy runs-on: ubuntu-latest steps: - - name: Register snapshot repo + - name: Create notification channel run : | - curl -XPUT -f "https://${{needs.validate-and-deploy.outputs.ENDPOINT}}:8443/_snapshot/snapshots-repo" -H 'Content-Type: application/json' -d' + curl -XPOST -f "https://${{needs.validate-and-deploy.outputs.ENDPOINT}}:8443/_plugins/_notifications/configs" -H 'Content-Type: application/json' -d' { - "type": "s3", - "settings": { - "bucket": "nightly-playgrounds-snapshots-bucket", - "region": "us-west-2", - "base_path": "${{needs.validate-and-deploy.outputs.PLAYGROUND_ID}}" - } - }' -u ${{ secrets.OPENSEARCH_USER }}:${{ secrets.OPENSEARCH_PASSWORD }} --insecure - - name: Restore altering configs + "config_id": "slack-notification-channel", + "name": "slack-notification-channel", + "config": { + "name": "slack-notification-channel", + "description": "Slack notification channel for monitoring alerts", + "config_type": "webhook", + "is_enabled": true, + "webhook": { + "url": "${{ secrets.SLACK_WEBHOOK }}" + } + } + }' -u ${{ secrets.OPENSEARCH_USER }}:${{ secrets.OPENSEARCH_PASSWORD }} --insecure + + - name: Configure monitors run: | - curl -XPOST "https://${{needs.validate-and-deploy.outputs.ENDPOINT}}:8443/_snapshot/snapshots-repo/alerts-config/_restore" -H 'Content-Type: application/json' -d' - { - "indices": ".opendistro-alerting-config,.opensearch-notifications-config", - "ignore_unavailable": false, - }' -u ${{ secrets.OPENSEARCH_USER }}:${{ secrets.OPENSEARCH_PASSWORD }} --insecure + for config in `ls nightly-playground/resources/monitors-config/`; + do curl -XPOST "https://${{needs.validate-and-deploy.outputs.ENDPOINT}}:8443/_plugins/_alerting/monitors" -H 'Content-Type: application/json' -d @nightly-playground/resources/monitors-config/$config -u ${{ secrets.OPENSEARCH_USER }}:${{ secrets.OPENSEARCH_PASSWORD }} --insecure; + done + diff --git a/nightly-playground/resources/monitors-config/cluster_health_monitor.json b/nightly-playground/resources/monitors-config/cluster_health_monitor.json new file mode 100644 index 0000000..bff0db4 --- /dev/null +++ b/nightly-playground/resources/monitors-config/cluster_health_monitor.json @@ -0,0 +1,54 @@ +{ + "name": "Cluster Health Monitor", + "type": "monitor", + "monitor_type": "cluster_metrics_monitor", + "enabled": true, + "schedule": { + "period": { + "unit": "MINUTES", + "interval": 5 + } + }, + "inputs": [ + { + "uri": { + "api_type": "CLUSTER_HEALTH", + "path": "_cluster/health", + "path_params": "", + "url": "http://localhost:9200/_cluster/health", + "clusters": [] + } + } + ], + "triggers": [ + { + "query_level_trigger": { + "id": "bK6a-I4BP-Tswg_on_U_", + "name": "Red Cluster", + "severity": "1", + "condition": { + "script": { + "source": "ctx.results[0].status != \"green\"", + "lang": "painless" + } + }, + "actions": [ + { + "id": "notification415896", + "name": "Red Cluster", + "destination_id": "slack-notification-channel", + "message_template": { + "source": "{\"Content\": \"\n:alert: Monitor {{ctx.monitor.name}} just entered alert status. Please investigate the issue.\n - Trigger: {{ctx.trigger.name}}\n - Severity: {{ctx.trigger.severity}}\n - Period start: {{ctx.periodStart}}\n - Period end: {{ctx.periodEnd}}\n - Cluster name: {{ctx.results.0.cluster_name}}\n\"}", + "lang": "mustache" + }, + "throttle_enabled": false, + "subject_template": { + "source": "Alerting Notification action", + "lang": "mustache" + } + } + ] + } + } + ] +} \ No newline at end of file diff --git a/nightly-playground/resources/monitors-config/jvm_monitor.json b/nightly-playground/resources/monitors-config/jvm_monitor.json new file mode 100644 index 0000000..b7d42c1 --- /dev/null +++ b/nightly-playground/resources/monitors-config/jvm_monitor.json @@ -0,0 +1,54 @@ +{ + "name": "High JVM Monitor", + "type": "monitor", + "monitor_type": "cluster_metrics_monitor", + "enabled": true, + "schedule": { + "period": { + "unit": "MINUTES", + "interval": 10 + } + }, + "inputs": [ + { + "uri": { + "api_type": "CLUSTER_STATS", + "path": "_cluster/stats", + "path_params": "", + "url": "http://localhost:9200/_cluster/stats", + "clusters": [] + } + } + ], + "triggers": [ + { + "query_level_trigger": { + "id": "5A2d-I4Bqk6CvEFkp7X5", + "name": "High JVM", + "severity": "2", + "condition": { + "script": { + "source": "ctx.results[0].nodes.jvm.mem.heap_used_in_bytes / ctx.results[0].nodes.jvm.mem.heap_max_in_bytes >= 0.75", + "lang": "painless" + } + }, + "actions": [ + { + "id": "notification431267", + "name": "High JVM Action", + "destination_id": "slack-notification-channel", + "message_template": { + "source": "{\"Content\": \"\n:alert: {{ctx.monitor.name}} just entered alert status. Please investigate the issue.\n - Trigger: {{ctx.trigger.name}}\n - Severity: {{ctx.trigger.severity}}\n - Period start: {{ctx.periodStart}}\n - Period end: {{ctx.periodEnd}}\n - OpenSearch Version: {{ctx.results.0.nodes.versions.0}}\n - JVM used in bytes: {{ctx.results.0.nodes.jvm.mem.heap_used_in_bytes}}\n - Max JVM available in bytes: {{ctx.results.0.nodes.jvm.mem.heap_max_in_bytes}} \n\"}", + "lang": "mustache" + }, + "throttle_enabled": false, + "subject_template": { + "source": "Alerting Notification action", + "lang": "mustache" + } + } + ] + } + } + ] + } \ No newline at end of file diff --git a/nightly-playground/resources/monitors-config/node_monitor.json b/nightly-playground/resources/monitors-config/node_monitor.json new file mode 100644 index 0000000..ce6af08 --- /dev/null +++ b/nightly-playground/resources/monitors-config/node_monitor.json @@ -0,0 +1,54 @@ +{ + "name": "Node Monitor", + "type": "monitor", + "monitor_type": "cluster_metrics_monitor", + "enabled": true, + "schedule": { + "period": { + "unit": "MINUTES", + "interval": 5 + } + }, + "inputs": [ + { + "uri": { + "api_type": "CLUSTER_HEALTH", + "path": "_cluster/health", + "path_params": "", + "url": "http://localhost:9200/_cluster/health", + "clusters": [] + } + } + ], + "triggers": [ + { + "query_level_trigger": { + "id": "1w2c-I4Bqk6CvEFke7VB", + "name": "Node Drop", + "severity": "2", + "condition": { + "script": { + "source": "ctx.results[0].number_of_nodes < 5", + "lang": "painless" + } + }, + "actions": [ + { + "id": "notification149870", + "name": "Node Drop Action", + "destination_id": "slack-notification-channel", + "message_template": { + "source": "{\"Content\": \"\n:alert: {{ctx.monitor.name}} just entered alert status. Please investigate the issue.\n - Trigger: {{ctx.trigger.name}}\n - Severity: {{ctx.trigger.severity}}\n - Period start: {{ctx.periodStart}}\n - Period end: {{ctx.periodEnd}}\n - Cluster Name: {{ctx.results.0.cluster_name}}\n - Number of Nodes expected: 5\n - Number of Nodes currently: {{ctx.results.0.number_of_nodes}}\n\"}", + "lang": "mustache" + }, + "throttle_enabled": false, + "subject_template": { + "source": "Alerting Notification action", + "lang": "mustache" + } + } + ] + } + } + ] + } \ No newline at end of file