Skip to content

Commit

Permalink
Merge branch 'master' into deprecate-ofn
Browse files Browse the repository at this point in the history
  • Loading branch information
wu-sheng authored Nov 27, 2023
2 parents 5841bff + 173c290 commit f722348
Show file tree
Hide file tree
Showing 9 changed files with 274 additions and 182 deletions.
8 changes: 7 additions & 1 deletion docs/en/changes/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
* Replace Metrics v2 protocol with MQE in UI templates and E2E Test.
* Fix incorrect apisix metrics otel rules.
* Support `Scratch The OAP Config Dump`.
* Group service endpoints into `_abandoned` when endpoints have high
cardinality.

#### UI

Expand All @@ -70,7 +72,11 @@
* Add Nginx menu i18n.
* Fix the height for trace widget.
* Polish list style.
* Fixes Log associate with Trace.
* Fix Log associate with Trace.
* Enhance layout for broken Topology widget.
* Fix calls metric with call type for Topology widget.
* Fix changing metrics config for Topology widget.
* Fix routes for Tab widget.
* Remove OpenFunction(FAAS layer) relative UI templates and menu item.

#### Documentation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,18 @@

package org.apache.skywalking.oap.server.core.config.group;

import io.vavr.Tuple2;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.skywalking.oap.server.ai.pipeline.services.api.HttpUriPattern;
import org.apache.skywalking.oap.server.ai.pipeline.services.api.HttpUriRecognition;
import org.apache.skywalking.oap.server.core.config.group.openapi.EndpointGroupingRule4Openapi;
Expand All @@ -37,9 +38,17 @@
import org.apache.skywalking.oap.server.library.util.CollectionUtils;
import org.apache.skywalking.oap.server.library.util.RunnableWithExceptionProtection;
import org.apache.skywalking.oap.server.library.util.StringFormatGroup;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import io.vavr.Tuple2;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class EndpointNameGrouping {
public static final String ABANDONED_ENDPOINT_NAME = "_abandoned";

/**
* Endpoint grouping according to local endpoint-name-grouping.yml or associated dynamic configuration.
*/
Expand All @@ -61,7 +70,14 @@ public class EndpointNameGrouping {
* If the URI is formatted by the rules, the value would be the first 10 formatted names.
* If the URI is unformatted, the value would be an empty queue.
*/
private ConcurrentHashMap<String, ConcurrentHashMap<String, ArrayBlockingQueue<String>>> cachedHttpUris = new ConcurrentHashMap<>();
private final Map<String/* service */, Map<String/* uri */, Queue<String>/* candidate patterns */>> cachedHttpUris = new ConcurrentHashMap<>();
private final LoadingCache<String/* service */, Set<String>/* unformatted uris */> unformattedHttpUrisCache =
CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10)).build(new CacheLoader<>() {
@Override
public Set<String> load(String service) {
return ConcurrentHashMap.newKeySet();
}
});
private final AtomicInteger aiPipelineExecutionCounter = new AtomicInteger(0);
/**
* The max number of HTTP URIs per service for further URI pattern recognition.
Expand Down Expand Up @@ -90,7 +106,7 @@ public Tuple2<String, Boolean> format(String serviceName, String endpointName) {
if (!formattedName._2() && quickUriGroupingRule != null) {
formattedName = formatByQuickUriPattern(serviceName, endpointName);

ConcurrentHashMap<String, ArrayBlockingQueue<String>> svrHttpUris =
Map<String, Queue<String>> svrHttpUris =
cachedHttpUris.computeIfAbsent(serviceName, k -> new ConcurrentHashMap<>());

// Only cache first N (determined by maxHttpUrisNumberPerService) URIs per 30 mins.
Expand All @@ -99,18 +115,29 @@ public Tuple2<String, Boolean> format(String serviceName, String endpointName) {
// Algorithm side should not return a pattern that has no {var} in it else this
// code may accidentally retrieve the size 1 queue created by unformatted endpoint
// The queue size is 10, which means only cache the first 10 formatted names.
final ArrayBlockingQueue<String> formattedURIs = svrHttpUris.computeIfAbsent(
final Queue<String> formattedURIs = svrHttpUris.computeIfAbsent(
formattedName._1(), k -> new ArrayBlockingQueue<>(10));
if (formattedURIs.size() < 10) {
// Try to push the raw URI as a candidate of formatted name.
formattedURIs.offer(endpointName);
}
// Try to push the raw URI as a candidate of formatted name.
formattedURIs.offer(endpointName);
} else {
svrHttpUris.computeIfAbsent(endpointName, k -> new ArrayBlockingQueue<>(1));
}
}
}

// If there are too many unformatted URIs, we will abandon the unformatted URIs to reduce
// the load of OAP and storage.
final var unformattedUrisOfService = unformattedHttpUrisCache.getUnchecked(serviceName);
if (!formattedName._2()) {
if (unformattedUrisOfService.size() < maxHttpUrisNumberPerService) {
unformattedUrisOfService.add(endpointName);
} else {
formattedName = new Tuple2<>(ABANDONED_ENDPOINT_NAME, true);
}
} else {
unformattedUrisOfService.remove(endpointName);
}

return formattedName;
}

Expand Down Expand Up @@ -204,8 +231,6 @@ public void startHttpUriRecognitionSvr(final HttpUriRecognition httpUriRecogniti
final List<HttpUriPattern> patterns
= httpUriRecognitionSvr.fetchAllPatterns(service.getName());
if (CollectionUtils.isNotEmpty(patterns)) {
StringFormatGroup group = new StringFormatGroup(
patterns.size());
patterns.forEach(
p -> quickUriGroupingRule.addRule(
service.getName(), p.getPattern()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,47 +287,67 @@
"linkDashboard": "General-Endpoint-Relation",
"nodeDashboard": "General-Endpoint",
"linkServerMetrics": [
"endpoint_relation_sla",
"endpoint_relation_resp_time",
"endpoint_relation_cpm"

],
"linkClientMetrics": [

],
"linkClientMetrics": [],
"nodeMetrics": [
"endpoint_sla",
"endpoint_resp_time",
"endpoint_cpm"

],
"legend": [],
"description": {},
"legend": [

],
"description": {

},
"linkServerMetricConfig": [
{
"unit": "%",
"label": "Success Rate",
"calculation": "percentage"
"unit": "%"
},
{
"unit": "ms",
"label": "Latency"
"label": "Latency",
"unit": "ms"
},
{
"unit": "calls / min",
"label": "Load"
"label": "Load",
"unit": "calls / min"
}
],
"nodeMetricConfig": [
{
"unit": "%",
"label": "Success Rate",
"calculation": "percentage"
"unit": "%"
},
{
"unit": "ms",
"label": "Latency"
"label": "Latency",
"unit": "ms"
},
{
"unit": "calls / min ",
"label": "Load"
"label": "Load",
"unit": "calls / min"
}
],
"linkServerExpressions": [
"avg(endpoint_relation_sla/100)",
"avg(endpoint_relation_resp_time)",
"avg(endpoint_relation_cpm)"
],
"linkClientExpressions": [

],
"nodeExpressions": [
"avg(endpoint_sla/100)",
"avg(endpoint_resp_time)",
"avg(endpoint_cpm)"
],
"metricMode": "Expression",
"legendMQE": {
"expression": ""
},
"linkClientMetricConfig": [

]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,69 +108,72 @@
}
],
"linkServerMetrics": [
"service_relation_server_resp_time",
"service_relation_server_cpm"

],
"linkClientMetrics": [
"service_relation_client_cpm",
"service_relation_client_resp_time"

],
"nodeMetrics": [
"service_cpm",
"service_sla",
"service_resp_time"

],
"legend": [
{
"name": "service_sla",
"condition": "<",
"value": "9500"
},
{
"name": "service_cpm",
"condition": ">",
"value": "1"
}

],
"description": {
"healthy": "Healthy",
"unhealthy": "Success Rate < 95% and Traffic > 1 calls / min"
},
"nodeMetricConfig": [
{
"unit": "calls / min",
"label": "Load"
"label": "Load",
"unit": "calls / min"
},
{
"calculation": "percentage",
"unit": "%",
"label": "Success Rate"
},
{
"unit": "ms",
"label": "Latency"
"label": "Latency",
"unit": "ms"
}
],
"linkServerMetricConfig": [
{
"unit": "ms",
"label": "Server Latency"
"label": "Server Latency",
"unit": "ms"
},
{
"unit": "calls / min",
"label": "Server Load"
"label": "Server Load",
"unit": "calls / min"
}
],
"linkClientMetricConfig": [
{
"unit": "calls / min",
"label": "Client Load"
"label": "Client Latency",
"unit": "ms"
},
{
"unit": "ms",
"label": "Client Latency"
"label": "Client Load",
"unit": "calls / min"
}
]
],
"linkServerExpressions": [
"avg(service_relation_server_resp_time)",
"avg(service_relation_server_cpm)"
],
"linkClientExpressions": [
"avg(service_relation_client_resp_time)",
"avg(service_relation_client_cpm)"
],
"nodeExpressions": [
"avg(service_cpm)",
"avg(service_sla/100)",
"avg(service_resp_time)"
],
"metricMode": "Expression",
"legendMQE": {
"expression": "avg(service_sla < 9500) and avg(service_cpm) > 1"
}
}
]
},
Expand Down
Loading

0 comments on commit f722348

Please sign in to comment.