Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Support Collector Alarm #2693

Open
wants to merge 41 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
38e409e
collector alarm
pwallk Sep 8, 2024
98ef2c7
fix eslint error
pwallk Sep 8, 2024
f77314a
Merge branch 'master' into feat-collector-alarm
Calvin979 Sep 9, 2024
201ca71
fix test error
pwallk Sep 9, 2024
f7a3928
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 9, 2024
2cafeea
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 10, 2024
d7e6868
Merge branch 'master' into feat-collector-alarm
yuluo-yx Sep 11, 2024
ffba083
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 11, 2024
84ffd0a
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 12, 2024
5e639b7
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 13, 2024
154a8d1
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 13, 2024
b51ed2b
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 13, 2024
f007fb9
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 15, 2024
5643900
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 15, 2024
d1aaeda
reuse AlarmCommonReduce
pwallk Sep 15, 2024
7a143c2
Merge branch 'feat-collector-alarm' of https://github.com/pwallk/hert…
pwallk Sep 15, 2024
2ecc13d
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 17, 2024
e79ccab
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 19, 2024
b28a7e4
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 21, 2024
1327ce6
Merge branch 'master' into feat-collector-alarm
yuluo-yx Sep 22, 2024
c71f602
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 23, 2024
61c784b
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 24, 2024
46a801f
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 25, 2024
4305259
Merge branch 'master' into feat-collector-alarm
Aias00 Sep 30, 2024
c2be070
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 8, 2024
a9bca1d
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 16, 2024
3ec4a79
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 26, 2024
e59e1f3
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 26, 2024
1caa633
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 27, 2024
67cb93c
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 29, 2024
86591dc
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 29, 2024
75196e7
Merge branch 'master' into feat-collector-alarm
Aias00 Oct 31, 2024
41d28bc
Merge branch 'master' into feat-collector-alarm
Aias00 Nov 1, 2024
f62c8e1
Merge branch 'master' into feat-collector-alarm
Aias00 Nov 5, 2024
4de628c
Merge remote-tracking branch 'refs/remotes/upstream/master' into feat…
pwallk Feb 15, 2025
ea546d8
fix conflict
pwallk Feb 26, 2025
2ee7917
Merge branch 'master' into feat-collector-alarm
pwallk Feb 26, 2025
87b7905
remove unused files
pwallk Feb 26, 2025
ad173ef
fix test
pwallk Mar 1, 2025
6cbe0b2
Merge branch 'master' into feat-collector-alarm
pwallk Mar 3, 2025
6990be2
Merge branch 'master' into feat-collector-alarm
pwallk Mar 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hertzbeat.alert.calculate;

import org.apache.hertzbeat.alert.dao.SingleAlertDao;
import org.apache.hertzbeat.alert.util.AlertUtil;
import org.apache.hertzbeat.common.constants.CommonConstants;
import org.apache.hertzbeat.common.entity.alerter.SingleAlert;
import org.springframework.stereotype.Component;

import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
* alert cache manager
*/
@Component
public class AlarmCacheManager {

/**
* The alarm in the process is triggered
* key - labels fingerprint
*/
private final Map<String, SingleAlert> pendingAlertMap;
/**
* The not recover alert
* key - labels fingerprint
*/
private final Map<String, SingleAlert> firingAlertMap;

public AlarmCacheManager(SingleAlertDao singleAlertDao) {
this.pendingAlertMap = new ConcurrentHashMap<>(8);
this.firingAlertMap = new ConcurrentHashMap<>(8);
List<SingleAlert> singleAlerts = singleAlertDao.querySingleAlertsByStatus(CommonConstants.ALERT_STATUS_FIRING);
for (SingleAlert singleAlert : singleAlerts) {
String fingerprint = AlertUtil.calculateFingerprint(singleAlert.getLabels());
singleAlert.setId(null);
this.firingAlertMap.put(fingerprint, singleAlert);
}
}

public void putPending(String fingerPrint, SingleAlert alert) {
this.pendingAlertMap.put(fingerPrint, alert);
}

public SingleAlert getPending(String fingerPrint) {
return this.pendingAlertMap.get(fingerPrint);
}

public SingleAlert removePending(String fingerPrint) {
return this.pendingAlertMap.remove(fingerPrint);
}

public void putFiring(String fingerPrint, SingleAlert alert) {
this.firingAlertMap.put(fingerPrint, alert);
}

public SingleAlert getFiring(String fingerPrint) {
return this.firingAlertMap.get(fingerPrint);
}

public SingleAlert removeFiring(String fingerPrint) {
return this.firingAlertMap.remove(fingerPrint);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hertzbeat.alert.calculate;

import lombok.extern.slf4j.Slf4j;
import org.apache.hertzbeat.alert.dao.AlertCollectorDao;
import org.apache.hertzbeat.alert.reduce.AlarmCommonReduce;
import org.apache.hertzbeat.alert.util.AlertUtil;
import org.apache.hertzbeat.common.constants.CommonConstants;
import org.apache.hertzbeat.common.entity.alerter.SingleAlert;
import org.apache.hertzbeat.common.entity.manager.Collector;
import org.apache.hertzbeat.common.support.event.SystemConfigChangeEvent;
import org.apache.hertzbeat.common.util.ResourceBundleUtil;
import org.springframework.context.event.EventListener;
import org.springframework.stereotype.Component;

import java.util.HashMap;
import java.util.Map;
import java.util.ResourceBundle;

/**
* handle collector alarm
*/
@Component
@Slf4j
public class CollectorAlertHandler {

private static final String KEY_COLLECTOR_NAME = "collectorName";
private static final String KEY_COLLECTOR_VERSION = "collectorVersion";
private static final String KEY_COLLECTOR_HOST = "collectorHost";

private final AlertCollectorDao alertCollectorDao;

private final AlarmCommonReduce alarmCommonReduce;

private final AlarmCacheManager alarmCacheManager;

private ResourceBundle bundle;


public CollectorAlertHandler(AlarmCommonReduce alarmCommonReduce, AlertCollectorDao alertCollectorDao,
AlarmCacheManager alarmCacheManager) {
this.alarmCommonReduce = alarmCommonReduce;
this.alertCollectorDao = alertCollectorDao;
this.alarmCacheManager = alarmCacheManager;
this.bundle = ResourceBundleUtil.getBundle("alerter");
}

/**
* handle collector online
*
* @param identity collector name
*/
public void online(final String identity) {
Collector collector = alertCollectorDao.findCollectorByName(identity);
if (collector == null) {
return;
}
Map<String, String> fingerPrints = new HashMap<>(8);
fingerPrints.put(KEY_COLLECTOR_NAME, collector.getName());
fingerPrints.put(KEY_COLLECTOR_VERSION, collector.getVersion());
fingerPrints.put(KEY_COLLECTOR_HOST, collector.getIp());
String fingerprint = AlertUtil.calculateFingerprint(fingerPrints);
SingleAlert firingAlert = alarmCacheManager.getFiring(fingerprint);
if (firingAlert != null) {
firingAlert.setTriggerTimes(1);
firingAlert.setEndAt(System.currentTimeMillis());
firingAlert.setStatus(CommonConstants.ALERT_STATUS_RESOLVED);
alarmCommonReduce.reduceAndSendAlarm(firingAlert.clone());
}
}


/**
* handle collector offline
*
* @param identity collector name
*/
public void offline(final String identity) {
Collector collector = alertCollectorDao.findCollectorByName(identity);
if (collector == null) {
return;
}
long currentTimeMill = System.currentTimeMillis();
Map<String, String> fingerPrints = new HashMap<>(8);
fingerPrints.put(KEY_COLLECTOR_NAME, collector.getName());
fingerPrints.put(KEY_COLLECTOR_VERSION, collector.getVersion());
fingerPrints.put(KEY_COLLECTOR_HOST, collector.getIp());
String fingerprint = AlertUtil.calculateFingerprint(fingerPrints);
SingleAlert existingAlert = alarmCacheManager.getFiring(fingerprint);
if (existingAlert == null) {
SingleAlert newAlert = SingleAlert.builder()
.labels(fingerPrints)
.annotations(fingerPrints)
.content(this.bundle.getString("alerter.availability.collector.offline"))
.status(CommonConstants.ALERT_STATUS_FIRING)
.triggerTimes(1)
.startAt(currentTimeMill)
.activeAt(currentTimeMill)
.build();
alarmCacheManager.putFiring(fingerprint, newAlert);
alarmCommonReduce.reduceAndSendAlarm(newAlert.clone());
}

}


@EventListener(SystemConfigChangeEvent.class)
public void onSystemConfigChangeEvent(SystemConfigChangeEvent event) {
log.info("calculate alarm receive system config change event: {}.", event.getSource());
this.bundle = ResourceBundleUtil.getBundle("alerter");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,17 @@

package org.apache.hertzbeat.alert.calculate;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.apache.hertzbeat.alert.reduce.AlarmCommonReduce;
import org.apache.hertzbeat.alert.service.DataSourceService;
import org.apache.hertzbeat.alert.util.AlertTemplateUtil;
import org.apache.hertzbeat.alert.util.AlertUtil;
import org.apache.hertzbeat.common.constants.CommonConstants;
import org.apache.hertzbeat.common.entity.alerter.AlertDefine;
import org.apache.hertzbeat.common.entity.alerter.SingleAlert;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Component;
Expand All @@ -47,22 +45,13 @@ public class PeriodicAlertCalculator {

private final DataSourceService dataSourceService;
private final AlarmCommonReduce alarmCommonReduce;
/**
* The alarm in the process is triggered
* key - labels fingerprint
*/
private final Map<String, SingleAlert> pendingAlertMap;
/**
* The not recover alert
* key - labels fingerprint
*/
private final Map<String, SingleAlert> firingAlertMap;

public PeriodicAlertCalculator(DataSourceService dataSourceService, AlarmCommonReduce alarmCommonReduce) {
private final AlarmCacheManager alarmCacheManager;

public PeriodicAlertCalculator(DataSourceService dataSourceService, AlarmCommonReduce alarmCommonReduce,
AlarmCacheManager alarmCacheManager) {
this.dataSourceService = dataSourceService;
this.alarmCommonReduce = alarmCommonReduce;
this.pendingAlertMap = new ConcurrentHashMap<>(8);
this.firingAlertMap = new ConcurrentHashMap<>(8);
this.alarmCacheManager = alarmCacheManager;
}

public void calculate(AlertDefine rule) {
Expand Down Expand Up @@ -122,8 +111,8 @@ public void calculate(AlertDefine rule) {

private void afterThresholdRuleMatch(long currentTimeMilli, Map<String, String> fingerPrints,
Map<String, Object> fieldValueMap, AlertDefine define) {
String fingerprint = calculateFingerprint(fingerPrints);
SingleAlert existingAlert = pendingAlertMap.get(fingerprint);
String fingerprint = AlertUtil.calculateFingerprint(fingerPrints);
SingleAlert existingAlert = alarmCacheManager.getPending(fingerprint);
Map<String, String> labels = new HashMap<>(8);
fieldValueMap.putAll(define.getLabels());
labels.putAll(fingerPrints);
Expand All @@ -144,11 +133,11 @@ private void afterThresholdRuleMatch(long currentTimeMilli, Map<String, String>
// If required trigger times is 1, set to firing status directly
if (requiredTimes <= 1) {
newAlert.setStatus(CommonConstants.ALERT_STATUS_FIRING);
firingAlertMap.put(fingerprint, newAlert);
alarmCacheManager.putFiring(fingerprint, newAlert);
alarmCommonReduce.reduceAndSendAlarm(newAlert.clone());
} else {
// Otherwise put into pending queue first
pendingAlertMap.put(fingerprint, newAlert);
alarmCacheManager.putPending(fingerprint, newAlert);
}
} else {
// Update existing alert
Expand All @@ -158,31 +147,25 @@ private void afterThresholdRuleMatch(long currentTimeMilli, Map<String, String>
// Check if required trigger times reached
if (existingAlert.getStatus().equals(CommonConstants.ALERT_STATUS_PENDING) && existingAlert.getTriggerTimes() >= requiredTimes) {
// Reached trigger times threshold, change to firing status
pendingAlertMap.remove(fingerprint);
alarmCacheManager.removePending(fingerprint);
existingAlert.setStatus(CommonConstants.ALERT_STATUS_FIRING);
firingAlertMap.put(fingerprint, existingAlert);
alarmCacheManager.putFiring(fingerprint, existingAlert);
alarmCommonReduce.reduceAndSendAlarm(existingAlert.clone());
}
}
}

private void handleRecoveredAlert(Map<String, String> fingerprints) {
String fingerprint = calculateFingerprint(fingerprints);
SingleAlert firingAlert = firingAlertMap.remove(fingerprint);
String fingerprint = AlertUtil.calculateFingerprint(fingerprints);
SingleAlert firingAlert = alarmCacheManager.removeFiring(fingerprint);
if (firingAlert != null) {
// todo consider multi times to tig for resolved alert
firingAlert.setTriggerTimes(1);
firingAlert.setEndAt(System.currentTimeMillis());
firingAlert.setStatus(CommonConstants.ALERT_STATUS_RESOLVED);
alarmCommonReduce.reduceAndSendAlarm(firingAlert.clone());
}
pendingAlertMap.remove(fingerprint);
alarmCacheManager.removePending(fingerprint);
}

private String calculateFingerprint(Map<String, String> fingerPrints) {
List<String> keyList = fingerPrints.keySet().stream().filter(Objects::nonNull).sorted().toList();
List<String> valueList = fingerPrints.values().stream().filter(Objects::nonNull).sorted().toList();
return Arrays.hashCode(keyList.toArray(new String[0])) + "-"
+ Arrays.hashCode(valueList.toArray(new String[0]));
}
}
Loading