Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: 修复日志相关 Sentry 异常 #1579

Merged
merged 4 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apiserver/paasng/paasng/accessories/log/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
# 默认查询日志的分片大小
DEFAULT_LOG_BATCH_SIZE = 200

# ES 查询的最大窗口,可在 ES 中配置,但不建议调大,容易导致 ES oom
# 日志平台最多也只返回 10,000 条数据,且不可修改
MAX_RESULT_WINDOW = 10000


class LogTimeChoices(str, StructuredEnum):
"""日志搜索-日期范围可选值"""
Expand Down
14 changes: 12 additions & 2 deletions apiserver/paasng/paasng/accessories/log/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from rest_framework.exceptions import ValidationError
from rest_framework.fields import get_attribute

from paasng.accessories.log.constants import LogTimeChoices
from paasng.accessories.log.constants import MAX_RESULT_WINDOW, LogTimeChoices
from paasng.infras.bk_log.constatns import BkLogType
from paasng.utils.es_log.time_range import SmartTimeRange

Expand Down Expand Up @@ -115,7 +115,13 @@ class LogFieldFilterSLZ(serializers.Serializer):


class LogQueryParamsSLZ(serializers.Serializer):
"""查询日志的 query 参数"""
"""查询日志的 query 参数,包含:
- 结构化日志:需要分页
- 访问日志:需要分页
- 标准输出日志:不需要分页
- 日志事件直方图:不需要分页
- 日志字段统计:不需要分页
jiayuan929 marked this conversation as resolved.
Show resolved Hide resolved
"""

time_range = serializers.ChoiceField(choices=LogTimeChoices.get_choices(), required=True)
start_time = serializers.DateTimeField(help_text="format %Y-%m-%d %H:%M:%S", allow_null=True, required=False)
Expand Down Expand Up @@ -143,6 +149,10 @@ def validate(self, attrs):
attrs["limit"] = attrs["page_size"]
if "page" in attrs:
attrs["offset"] = (attrs["page"] - 1) * attrs["limit"]

# 限制最大分页条数
if attrs["offset"] + attrs["limit"] > MAX_RESULT_WINDOW:
raise ValidationError(_(f"最多仅能查看前 {MAX_RESULT_WINDOW} 条日志"))
return attrs


Expand Down
26 changes: 20 additions & 6 deletions apiserver/paasng/paasng/accessories/log/views/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

from paasng.accessories.log import serializers
from paasng.accessories.log.client import instantiate_log_client
from paasng.accessories.log.constants import DEFAULT_LOG_BATCH_SIZE, LogType
from paasng.accessories.log.constants import DEFAULT_LOG_BATCH_SIZE, MAX_RESULT_WINDOW, LogType
from paasng.accessories.log.dsl import SearchRequestSchema
from paasng.accessories.log.exceptions import NoIndexError
from paasng.accessories.log.filters import EnvFilter, ModuleFilter
Expand Down Expand Up @@ -218,7 +218,9 @@ def query_logs(self, request, code, module_name, environment):
search=search,
timeout=settings.DEFAULT_ES_SEARCH_TIMEOUT,
)
except RequestError:
except RequestError as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("Request error when querying logs: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to get logs")
Expand All @@ -229,6 +231,8 @@ def query_logs(self, request, code, module_name, environment):
"logs": clean_logs(list(response), log_config.search_params),
"total": total,
"dsl": json.dumps(search.to_dict()),
# 前端使用该配置控制页面上展示的日志分页的最大页数
"max_result_window": MAX_RESULT_WINDOW,
},
Logs[self.line_model], # type: ignore
)
Expand Down Expand Up @@ -264,7 +268,9 @@ def query_logs_scroll(self, request, code, module_name, environment):
# scan 失败大概率是 scroll_id 失效
logger.exception("scroll_id 失效, 日志查询失败")
raise error_codes.QUERY_LOG_FAILED.f(_("日志查询快照失效, 请刷新后重试。"))
except RequestError:
except RequestError as e:
# # 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("request error when querying logs: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to get logs")
Expand Down Expand Up @@ -298,10 +304,18 @@ def aggregate_date_histogram(self, request, code, module_name, environment):
),
time_field=log_config.search_params.timeField,
)
try:
response = log_client.aggregate_date_histogram(
index=log_config.search_params.indexPattern, search=search, timeout=settings.DEFAULT_ES_SEARCH_TIMEOUT
)
except RequestError as e:
# # 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("request error when aggregate time-based histogram: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to aggregate time-based histogram")
raise error_codes.QUERY_LOG_FAILED.f(_("聚合时间直方图失败,请稍后再试。"))

response = log_client.aggregate_date_histogram(
index=log_config.search_params.indexPattern, search=search, timeout=settings.DEFAULT_ES_SEARCH_TIMEOUT
)
date_histogram = cattr.structure(
{
**clean_histogram_buckets(response),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class ErrorCodes:
QUERY_REPO_OVERVIEW_DATA_ERROR = ErrorCode(_("查询代码仓库概览数据异常"))
# 日志查询异常
QUERY_ES_ERROR = ErrorCode(_("日志系统异常, 请稍后重试"))
QUERY_LOG_FAILED = ErrorCode(_("查询日志失败"))
QUERY_REQUEST_ERROR = ErrorCode(_("查询日志失败,请检查查询条件"))

# 可见范围修改失败
VISIBLE_RANGE_UPDATE_FAIELD = ErrorCode(_("可见范围修改失败"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from paasng.accessories.log.client import instantiate_log_client as instantiate_bksaas_log_client
from paasng.accessories.log.constants import LogType
from paasng.accessories.log.models import ProcessLogQueryConfig
from paasng.accessories.log.shim import setup_env_log_model
from paasng.bk_plugins.pluginscenter.definitions import ElasticSearchParams
from paasng.bk_plugins.pluginscenter.log.client import (
FieldBucketData,
Expand Down Expand Up @@ -249,7 +250,10 @@ def _instantiate_log_client(
search_params = cast(ElasticSearchParams, pd.log_config.ingress)
return log_client, search_params
# 由于 bk-saas 接入了日志平台, 每个应用独立的日志查询配置, 因此需要访问 PaaS 的数据库获取配置信息
env = Application.objects.get(code=instance.id).get_app_envs("prod")
# 插件开发中心只部署主模块的生产环境
env = Application.objects.get(code=instance.id).envs.get(environment="prod", module__is_default=True)
# 初始化 env log 模型, 保证数据库对象存在且是 settings 中的最新配置
setup_env_log_model(env)
if log_type == LogType.INGRESS:
log_config = ProcessLogQueryConfig.objects.select_process_irrelevant(env).ingress
search_params = log_config.search_params
Expand Down
10 changes: 8 additions & 2 deletions apiserver/paasng/paasng/bk_plugins/pluginscenter/log/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
# We undertake not to change the open source license (MIT license) applicable
# to the current version of the project delivered to anyone in the future.

import logging
from functools import reduce
from operator import add
from typing import Dict, List, Protocol, Tuple
Expand All @@ -39,6 +39,8 @@
from paasng.utils.es_log.models import FieldFilter
from paasng.utils.es_log.search import SmartSearch

logger = logging.getLogger(__name__)


class LogClientProtocol(Protocol):
"""LogClient protocol, all log search backend should abide this protocol"""
Expand Down Expand Up @@ -115,7 +117,11 @@ def _call_api(self, data, timeout: int):
data["bkdata_authentication_method"] = self.config.bkdataAuthenticationMethod
if self.config.bkdataDataToken:
data["bkdata_data_token"] = self.config.bkdataDataToken
return self.client.call(data=data, timeout=timeout)
resp = self.client.call(data=data, timeout=timeout)
if not resp.get("result"):
logger.error(f"query bk log error: {resp.get('message')}")
raise error_codes.QUERY_REQUEST_ERROR
SheepSheepChen marked this conversation as resolved.
Show resolved Hide resolved
return resp


class ESLogClient:
Expand Down
137 changes: 89 additions & 48 deletions apiserver/paasng/paasng/bk_plugins/pluginscenter/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from django.utils.translation import get_language
from django.utils.translation import gettext_lazy as _
from drf_yasg.utils import swagger_auto_schema
from elasticsearch.exceptions import RequestError
from rest_framework import mixins, status
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.pagination import LimitOffsetPagination
Expand Down Expand Up @@ -993,15 +994,23 @@ def query_standard_output_logs(self, request, pd_id, plugin_id):
slz.is_valid(raise_exception=True)
query_params = slz.validated_data

logs = log_api.query_standard_output_logs(
pd=plugin.pd,
instance=plugin,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
limit=query_params["limit"],
offset=query_params["offset"],
)
try:
logs = log_api.query_standard_output_logs(
pd=plugin.pd,
instance=plugin,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
limit=query_params["limit"],
offset=query_params["offset"],
)
except RequestError as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("request error when querying stdout log: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("Failed to query stdout log")
raise error_codes.QUERY_LOG_FAILED.f(_("查询标准输出日志失败,请稍后再试。"))
return Response(data=serializers.StandardOutputLogsSLZ(logs).data)

@swagger_auto_schema(
Expand All @@ -1021,17 +1030,25 @@ def query_structure_logs(self, request, pd_id, plugin_id):
slz.is_valid(raise_exception=True)
query_params = slz.validated_data

logs = log_api.query_structure_logs(
pd=plugin.pd,
instance=plugin,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
terms=data["query"]["terms"],
exclude=data["query"]["exclude"],
limit=query_params["limit"],
offset=query_params["offset"],
)
try:
logs = log_api.query_structure_logs(
pd=plugin.pd,
instance=plugin,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
terms=data["query"]["terms"],
exclude=data["query"]["exclude"],
limit=query_params["limit"],
offset=query_params["offset"],
)
except RequestError as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("request error when querying structure log: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("Failed to query structure log")
raise error_codes.QUERY_LOG_FAILED.f(_("查询结构化日志失败,请稍后再试。"))
return Response(data=serializers.StructureLogsSLZ(logs).data)

@swagger_auto_schema(
Expand All @@ -1051,15 +1068,23 @@ def query_ingress_logs(self, request, pd_id, plugin_id):
slz.is_valid(raise_exception=True)
query_params = slz.validated_data

logs = log_api.query_ingress_logs(
pd=plugin.pd,
instance=plugin,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
limit=query_params["limit"],
offset=query_params["offset"],
)
try:
logs = log_api.query_ingress_logs(
pd=plugin.pd,
instance=plugin,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
limit=query_params["limit"],
offset=query_params["offset"],
)
except RequestError as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("request error when querying ingress log: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("Failed to query ingress log")
raise error_codes.QUERY_LOG_FAILED.f(_("查询访问日志失败,请稍后再试。"))
return Response(data=serializers.IngressLogSLZ(logs).data)

@swagger_auto_schema(
Expand All @@ -1081,14 +1106,22 @@ def aggregate_date_histogram(
slz.is_valid(raise_exception=True)
query_params = slz.validated_data

date_histogram = log_api.aggregate_date_histogram(
pd=plugin.pd,
instance=plugin,
log_type=log_type,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
)
try:
date_histogram = log_api.aggregate_date_histogram(
pd=plugin.pd,
instance=plugin,
log_type=log_type,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
)
except RequestError as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("failed to aggregate time-based histogram: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to aggregate time-based histogram")
raise error_codes.QUERY_LOG_FAILED.f(_("聚合时间直方图失败,请稍后再试。"))
return Response(data=serializers.DateHistogramSLZ(date_histogram).data)

@swagger_auto_schema(
Expand All @@ -1099,7 +1132,7 @@ def aggregate_date_histogram(
def aggregate_fields_filters(
self, request, pd_id, plugin_id, log_type: Literal["standard_output", "structure", "ingress"]
):
"""查询日志基于时间分布的直方图"""
"""统计日志的字段分布"""
plugin = self.get_plugin_instance()

slz = serializers.PluginLogQueryBodySLZ(data=request.data)
Expand All @@ -1110,16 +1143,24 @@ def aggregate_fields_filters(
slz.is_valid(raise_exception=True)
query_params = slz.validated_data

fields_filters = log_api.aggregate_fields_filters(
pd=plugin.pd,
instance=plugin,
log_type=log_type,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
terms=data["query"]["terms"],
exclude=data["query"]["exclude"],
)
try:
fields_filters = log_api.aggregate_fields_filters(
pd=plugin.pd,
instance=plugin,
log_type=log_type,
operator=request.user.username,
time_range=query_params["smart_time_range"],
query_string=data["query"]["query_string"],
terms=data["query"]["terms"],
exclude=data["query"]["exclude"],
)
except RequestError as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,故仅打 error 日志
logger.error("request error when aggregating log fields: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("Failed to aggregate log fields")
raise error_codes.QUERY_LOG_FAILED.f(_("统计日志的字段失败,请稍后再试。"))
return Response(data=serializers.LogFieldFilterSLZ(fields_filters, many=True).data)


Expand Down