Skip to content

Commit

Permalink
fix: 修复日志相关 Sentry 异常 (TencentBlueKing#1579)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiayuan929 authored and songzxc789 committed Sep 24, 2024
1 parent 1f5bb39 commit ff89578
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 63 deletions.
12 changes: 9 additions & 3 deletions apiserver/paasng/paasng/accessories/log/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
# We undertake not to change the open source license (MIT license) applicable
# to the current version of the project delivered to anyone in the future.

import logging
from operator import attrgetter
from typing import Dict, List, Optional, Protocol, Tuple, Union

Expand All @@ -27,7 +27,7 @@
from elasticsearch_dsl.response.aggs import FieldBucketData

from paasng.accessories.log.constants import DEFAULT_LOG_BATCH_SIZE
from paasng.accessories.log.exceptions import LogQueryError, NoIndexError
from paasng.accessories.log.exceptions import BkLogApiError, LogQueryError, NoIndexError
from paasng.accessories.log.filters import (
FieldFilter,
agg_builtin_filters,
Expand All @@ -40,6 +40,8 @@
from paasng.utils.es_log.misc import filter_indexes_by_time_range
from paasng.utils.es_log.search import SmartSearch, SmartTimeRange

logger = logging.getLogger(__name__)


class LogClientProtocol(Protocol):
"""LogClient protocol, all log search backend should abide this protocol"""
Expand Down Expand Up @@ -178,7 +180,11 @@ def _call_api(self, method: _APIGWOperationStub, data: Dict, timeout: int):
data["bkdata_authentication_method"] = self.config.bkdataAuthenticationMethod
if self.config.bkdataDataToken:
data["bkdata_data_token"] = self.config.bkdataDataToken
return method(data=data, timeout=timeout)
resp = method(data=data, timeout=timeout)
if not resp.get("result"):
logger.error(f"query bk log error: {resp['message']}")
raise BkLogApiError(resp["message"])
return resp


class ESLogClient:
Expand Down
4 changes: 4 additions & 0 deletions apiserver/paasng/paasng/accessories/log/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
# 默认查询日志的分片大小
DEFAULT_LOG_BATCH_SIZE = 200

# ES 查询的最大窗口,可在 ES 中配置,但不建议调大,容易导致 ES oom
# 日志平台最多也只返回 10,000 条数据,且不可修改
MAX_RESULT_WINDOW = 10000


class LogTimeChoices(str, StructuredEnum):
"""日志搜索-日期范围可选值"""
Expand Down
16 changes: 16 additions & 0 deletions apiserver/paasng/paasng/accessories/log/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,19 @@ def __init__(self, lacking_key: str):

class NoIndexError(Exception):
"""无可用 index"""


class BkLogGatewayServiceError(Exception):
"""This error indicates that there's something wrong when operating bk_log's
API Gateway resource. It's a wrapper class of API SDK's original exceptions
"""

def __init__(self, message: str):
super().__init__(message)
self.message = message


class BkLogApiError(BkLogGatewayServiceError):
"""When calling the bk_log api, bk_log returns an error message,
which needs to be captured and displayed to the user on the page
"""
14 changes: 12 additions & 2 deletions apiserver/paasng/paasng/accessories/log/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from rest_framework.exceptions import ValidationError
from rest_framework.fields import get_attribute

from paasng.accessories.log.constants import LogTimeChoices
from paasng.accessories.log.constants import MAX_RESULT_WINDOW, LogTimeChoices
from paasng.infras.bk_log.constatns import BkLogType
from paasng.utils.es_log.time_range import SmartTimeRange

Expand Down Expand Up @@ -115,7 +115,13 @@ class LogFieldFilterSLZ(serializers.Serializer):


class LogQueryParamsSLZ(serializers.Serializer):
"""查询日志的 query 参数"""
"""查询日志的 query 参数,包含:
- 结构化日志
- 访问日志
- 标准输出日志
- 日志事件直方图
- 日志字段统计
"""

time_range = serializers.ChoiceField(choices=LogTimeChoices.get_choices(), required=True)
start_time = serializers.DateTimeField(help_text="format %Y-%m-%d %H:%M:%S", allow_null=True, required=False)
Expand Down Expand Up @@ -143,6 +149,10 @@ def validate(self, attrs):
attrs["limit"] = attrs["page_size"]
if "page" in attrs:
attrs["offset"] = (attrs["page"] - 1) * attrs["limit"]

# 限制最大分页条数
if attrs["offset"] + attrs["limit"] > MAX_RESULT_WINDOW:
raise ValidationError(_(f"最多仅能查看前 {MAX_RESULT_WINDOW} 条日志"))
return attrs


Expand Down
28 changes: 21 additions & 7 deletions apiserver/paasng/paasng/accessories/log/views/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@

from paasng.accessories.log import serializers
from paasng.accessories.log.client import instantiate_log_client
from paasng.accessories.log.constants import DEFAULT_LOG_BATCH_SIZE, LogType
from paasng.accessories.log.constants import DEFAULT_LOG_BATCH_SIZE, MAX_RESULT_WINDOW, LogType
from paasng.accessories.log.dsl import SearchRequestSchema
from paasng.accessories.log.exceptions import NoIndexError
from paasng.accessories.log.exceptions import BkLogApiError, NoIndexError
from paasng.accessories.log.filters import EnvFilter, ModuleFilter
from paasng.accessories.log.models import ElasticSearchParams, ProcessLogQueryConfig
from paasng.accessories.log.responses import IngressLogLine, StandardOutputLogLine, StructureLogLine
Expand Down Expand Up @@ -218,7 +218,9 @@ def query_logs(self, request, code, module_name, environment):
search=search,
timeout=settings.DEFAULT_ES_SEARCH_TIMEOUT,
)
except RequestError:
except (RequestError, BkLogApiError) as e:
# 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,仅打 error 日志即可
logger.error("Request error when querying logs: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to get logs")
Expand All @@ -229,6 +231,8 @@ def query_logs(self, request, code, module_name, environment):
"logs": clean_logs(list(response), log_config.search_params),
"total": total,
"dsl": json.dumps(search.to_dict()),
# 前端使用该配置控制页面上展示的日志分页的最大页数
"max_result_window": MAX_RESULT_WINDOW,
},
Logs[self.line_model], # type: ignore
)
Expand Down Expand Up @@ -264,7 +268,9 @@ def query_logs_scroll(self, request, code, module_name, environment):
# scan 失败大概率是 scroll_id 失效
logger.exception("scroll_id 失效, 日志查询失败")
raise error_codes.QUERY_LOG_FAILED.f(_("日志查询快照失效, 请刷新后重试。"))
except RequestError:
except (RequestError, BkLogApiError) as e:
# # 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,仅打 error 日志即可
logger.error("request error when querying logs: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to get logs")
Expand Down Expand Up @@ -298,10 +304,18 @@ def aggregate_date_histogram(self, request, code, module_name, environment):
),
time_field=log_config.search_params.timeField,
)
try:
response = log_client.aggregate_date_histogram(
index=log_config.search_params.indexPattern, search=search, timeout=settings.DEFAULT_ES_SEARCH_TIMEOUT
)
except (RequestError, BkLogApiError) as e:
# # 用户输入数据不符合 ES 语法等报错,不需要记录到 Sentry,仅打 error 日志即可
logger.error("request error when aggregate time-based histogram: %s", e) # noqa: TRY400
raise error_codes.QUERY_REQUEST_ERROR
except Exception:
logger.exception("failed to aggregate time-based histogram")
raise error_codes.QUERY_LOG_FAILED.f(_("聚合时间直方图失败,请稍后再试。"))

response = log_client.aggregate_date_histogram(
index=log_config.search_params.indexPattern, search=search, timeout=settings.DEFAULT_ES_SEARCH_TIMEOUT
)
date_histogram = cattr.structure(
{
**clean_histogram_buckets(response),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class ErrorCodes:
QUERY_REPO_OVERVIEW_DATA_ERROR = ErrorCode(_("查询代码仓库概览数据异常"))
# 日志查询异常
QUERY_ES_ERROR = ErrorCode(_("日志系统异常, 请稍后重试"))
QUERY_LOG_FAILED = ErrorCode(_("查询日志失败"))
QUERY_REQUEST_ERROR = ErrorCode(_("查询日志失败,请检查查询条件"))

# 可见范围修改失败
VISIBLE_RANGE_UPDATE_FAIELD = ErrorCode(_("可见范围修改失败"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from paasng.accessories.log.client import instantiate_log_client as instantiate_bksaas_log_client
from paasng.accessories.log.constants import LogType
from paasng.accessories.log.models import ProcessLogQueryConfig
from paasng.accessories.log.shim import setup_env_log_model
from paasng.bk_plugins.pluginscenter.definitions import ElasticSearchParams
from paasng.bk_plugins.pluginscenter.log.client import (
FieldBucketData,
Expand Down Expand Up @@ -249,7 +250,10 @@ def _instantiate_log_client(
search_params = cast(ElasticSearchParams, pd.log_config.ingress)
return log_client, search_params
# 由于 bk-saas 接入了日志平台, 每个应用独立的日志查询配置, 因此需要访问 PaaS 的数据库获取配置信息
env = Application.objects.get(code=instance.id).get_app_envs("prod")
# 插件开发中心只部署主模块的生产环境
env = Application.objects.get(code=instance.id).envs.get(environment="prod", module__is_default=True)
# 初始化 env log 模型, 保证数据库对象存在且是 settings 中的最新配置
setup_env_log_model(env)
if log_type == LogType.INGRESS:
log_config = ProcessLogQueryConfig.objects.select_process_irrelevant(env).ingress
search_params = log_config.search_params
Expand Down
11 changes: 9 additions & 2 deletions apiserver/paasng/paasng/bk_plugins/pluginscenter/log/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
# We undertake not to change the open source license (MIT license) applicable
# to the current version of the project delivered to anyone in the future.

import logging
from functools import reduce
from operator import add
from typing import Dict, List, Protocol, Tuple
Expand All @@ -34,11 +34,14 @@
)
from paasng.bk_plugins.pluginscenter.exceptions import error_codes
from paasng.bk_plugins.pluginscenter.log.constants import DEFAULT_LOG_BATCH_SIZE
from paasng.bk_plugins.pluginscenter.log.exceptions import BkLogApiError
from paasng.bk_plugins.pluginscenter.thirdparty.utils import make_client
from paasng.utils.es_log.misc import count_filters_options
from paasng.utils.es_log.models import FieldFilter
from paasng.utils.es_log.search import SmartSearch

logger = logging.getLogger(__name__)


class LogClientProtocol(Protocol):
"""LogClient protocol, all log search backend should abide this protocol"""
Expand Down Expand Up @@ -115,7 +118,11 @@ def _call_api(self, data, timeout: int):
data["bkdata_authentication_method"] = self.config.bkdataAuthenticationMethod
if self.config.bkdataDataToken:
data["bkdata_data_token"] = self.config.bkdataDataToken
return self.client.call(data=data, timeout=timeout)
resp = self.client.call(data=data, timeout=timeout)
if not resp.get("result"):
logger.error(f"query bk log error: {resp['message']}")
raise BkLogApiError(resp["message"])
return resp


class ESLogClient:
Expand Down
32 changes: 32 additions & 0 deletions apiserver/paasng/paasng/bk_plugins/pluginscenter/log/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# TencentBlueKing is pleased to support the open source community by making
# 蓝鲸智云 - PaaS 平台 (BlueKing - PaaS System) available.
# Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
# Licensed under the MIT License (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://opensource.org/licenses/MIT
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.
#
# We undertake not to change the open source license (MIT license) applicable
# to the current version of the project delivered to anyone in the future.


class BkLogGatewayServiceError(Exception):
"""This error indicates that there's something wrong when operating bk_log's
API Gateway resource. It's a wrapper class of API SDK's original exceptions
"""

def __init__(self, message: str):
super().__init__(message)
self.message = message


class BkLogApiError(BkLogGatewayServiceError):
"""When calling the bk_log api, bk_log returns an error message,
which needs to be captured and displayed to the user on the page
"""
Loading

0 comments on commit ff89578

Please sign in to comment.