From 6a6229771de32aa0ba8ad35d40ffdac0c26079f7 Mon Sep 17 00:00:00 2001 From: Jason Davis <32852580+JasonD28@users.noreply.github.com> Date: Tue, 1 Sep 2020 13:36:02 -0700 Subject: [PATCH] feat: refractored SQL-based alerting framework (#10605) * added new tables for alerting refractor * reformatted inheritance structure * added workflow for updated framework * added suggested changes * cleaned up changes * added obervations to alert table to enable view * added comments * added requested changes * fix tests * added styling changes * mypy * added requested changes * updated operator logic * requested changes, 1 validator, styling changes * refactored tests * fix test alert workflow * fixed create_alert in test Co-authored-by: Jason Davis <@dropbox.com> --- superset/app.py | 11 +- .../2e5a0ee25ed4_refractor_alerting.py | 125 +++++++ superset/models/alerts.py | 127 ++++++- superset/tasks/alerts/__init__.py | 17 + superset/tasks/alerts/observer.py | 101 +++++ superset/tasks/alerts/validator.py | 113 ++++++ superset/tasks/schedules.py | 121 +++--- superset/templates/email/alert.txt | 3 +- superset/templates/slack/alert.txt | 3 +- .../templates/slack/alert_no_screenshot.txt | 3 +- superset/views/alerts.py | 150 +++++++- tests/alerts_tests.py | 346 +++++++++++++----- 12 files changed, 944 insertions(+), 176 deletions(-) create mode 100644 superset/migrations/versions/2e5a0ee25ed4_refractor_alerting.py create mode 100644 superset/tasks/alerts/__init__.py create mode 100644 superset/tasks/alerts/observer.py create mode 100644 superset/tasks/alerts/validator.py diff --git a/superset/app.py b/superset/app.py index 43089326edc5c..4878a83d6afe2 100644 --- a/superset/app.py +++ b/superset/app.py @@ -144,7 +144,13 @@ def init_views(self) -> None: from superset.datasets.api import DatasetRestApi from superset.queries.api import QueryRestApi from superset.views.access_requests import AccessRequestsModelView - from superset.views.alerts import AlertLogModelView, AlertModelView + from superset.views.alerts import ( + AlertLogModelView, + AlertModelView, + AlertObservationModelView, + ValidatorInlineView, + SQLObserverInlineView, + ) from superset.views.annotations import ( AnnotationLayerModelView, AnnotationModelView, @@ -399,6 +405,9 @@ def init_views(self) -> None: category_label=__("Manage"), icon="fa-exclamation-triangle", ) + appbuilder.add_view_no_menu(SQLObserverInlineView) + appbuilder.add_view_no_menu(ValidatorInlineView) + appbuilder.add_view_no_menu(AlertObservationModelView) appbuilder.add_view_no_menu(AlertLogModelView) # diff --git a/superset/migrations/versions/2e5a0ee25ed4_refractor_alerting.py b/superset/migrations/versions/2e5a0ee25ed4_refractor_alerting.py new file mode 100644 index 0000000000000..98bd8a4f54443 --- /dev/null +++ b/superset/migrations/versions/2e5a0ee25ed4_refractor_alerting.py @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""refractor_alerting + +Revision ID: 2e5a0ee25ed4 +Revises: f80a3b88324b +Create Date: 2020-08-31 20:30:30.781478 + +""" + +# revision identifiers, used by Alembic. +revision = "2e5a0ee25ed4" +down_revision = "f80a3b88324b" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import mysql + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "alert_validators", + sa.Column("created_on", sa.DateTime(), nullable=True), + sa.Column("changed_on", sa.DateTime(), nullable=True), + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("validator_type", sa.String(length=100), nullable=False), + sa.Column("config", sa.Text(), nullable=True), + sa.Column("created_by_fk", sa.Integer(), nullable=True), + sa.Column("changed_by_fk", sa.Integer(), nullable=True), + sa.Column("alert_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["alert_id"], ["alerts.id"],), + sa.ForeignKeyConstraint(["changed_by_fk"], ["ab_user.id"],), + sa.ForeignKeyConstraint(["created_by_fk"], ["ab_user.id"],), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "sql_observers", + sa.Column("created_on", sa.DateTime(), nullable=True), + sa.Column("changed_on", sa.DateTime(), nullable=True), + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("sql", sa.Text(), nullable=False), + sa.Column("created_by_fk", sa.Integer(), nullable=True), + sa.Column("changed_by_fk", sa.Integer(), nullable=True), + sa.Column("alert_id", sa.Integer(), nullable=False), + sa.Column("database_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["alert_id"], ["alerts.id"],), + sa.ForeignKeyConstraint(["changed_by_fk"], ["ab_user.id"],), + sa.ForeignKeyConstraint(["created_by_fk"], ["ab_user.id"],), + sa.ForeignKeyConstraint(["database_id"], ["dbs.id"],), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "sql_observations", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("dttm", sa.DateTime(), nullable=True), + sa.Column("observer_id", sa.Integer(), nullable=False), + sa.Column("alert_id", sa.Integer(), nullable=True), + sa.Column("value", sa.Float(), nullable=True), + sa.Column("error_msg", sa.String(length=500), nullable=True), + sa.ForeignKeyConstraint(["alert_id"], ["alerts.id"],), + sa.ForeignKeyConstraint(["observer_id"], ["sql_observers.id"],), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_sql_observations_dttm"), "sql_observations", ["dttm"], unique=False + ) + + with op.batch_alter_table("alerts") as batch_op: + batch_op.add_column(sa.Column("changed_by_fk", sa.Integer(), nullable=True)) + batch_op.add_column(sa.Column("changed_on", sa.DateTime(), nullable=True)) + batch_op.add_column(sa.Column("created_by_fk", sa.Integer(), nullable=True)) + batch_op.add_column(sa.Column("created_on", sa.DateTime(), nullable=True)) + batch_op.alter_column( + "crontab", existing_type=mysql.VARCHAR(length=50), nullable=False + ) + batch_op.create_foreign_key( + "alerts_ibfk_3", "ab_user", ["changed_by_fk"], ["id"] + ) + batch_op.create_foreign_key( + "alerts_ibfk_4", "ab_user", ["created_by_fk"], ["id"] + ) + batch_op.drop_column("sql") + batch_op.drop_column("database_id") + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("alerts") as batch_op: + batch_op.add_column( + sa.Column( + "database_id", mysql.INTEGER(), autoincrement=False, nullable=False + ) + ) + batch_op.add_column(sa.Column("sql", mysql.TEXT(), nullable=True)) + batch_op.drop_constraint("alerts_ibfk_3", type_="foreignkey") + batch_op.drop_constraint("alerts_ibfk_4", type_="foreignkey") + batch_op.alter_column( + "crontab", existing_type=mysql.VARCHAR(length=50), nullable=True + ) + batch_op.drop_column("created_on") + batch_op.drop_column("created_by_fk") + batch_op.drop_column("changed_on") + batch_op.drop_column("changed_by_fk") + + op.drop_index(op.f("ix_sql_observations_dttm"), table_name="sql_observations") + op.drop_table("sql_observations") + op.drop_table("sql_observers") + op.drop_table("alert_validators") + # ### end Alembic commands ### diff --git a/superset/models/alerts.py b/superset/models/alerts.py index cbea6571c588c..a62aacc419fe5 100644 --- a/superset/models/alerts.py +++ b/superset/models/alerts.py @@ -15,22 +15,27 @@ # specific language governing permissions and limitations # under the License. """Models for scheduled execution of jobs""" +import textwrap from datetime import datetime +from typing import Any, Optional from flask_appbuilder import Model from sqlalchemy import ( Boolean, Column, DateTime, + Float, ForeignKey, Integer, String, Table, Text, ) -from sqlalchemy.orm import backref, relationship +from sqlalchemy.ext.declarative import declared_attr +from sqlalchemy.orm import backref, relationship, RelationshipProperty -from superset import security_manager +from superset import db, security_manager +from superset.models.helpers import AuditMixinNullable metadata = Model.metadata # pylint: disable=no-member @@ -44,23 +49,23 @@ ) -class Alert(Model): +class Alert(Model, AuditMixinNullable): """Schedules for emailing slices / dashboards""" __tablename__ = "alerts" id = Column(Integer, primary_key=True) - label = Column(String(150)) + label = Column(String(150), nullable=False) active = Column(Boolean, default=True, index=True) - crontab = Column(String(50)) - sql = Column(Text) + crontab = Column(String(50), nullable=False) alert_type = Column(String(50)) owners = relationship(security_manager.user_model, secondary=alert_owner) recipients = Column(Text) slack_channel = Column(Text) + # TODO: implement log_retention log_retention = Column(Integer, default=90) grace_period = Column(Integer, default=60 * 60 * 24) @@ -70,13 +75,6 @@ class Alert(Model): dashboard_id = Column(Integer, ForeignKey("dashboards.id")) dashboard = relationship("Dashboard", backref="alert", foreign_keys=[dashboard_id]) - database_id = Column(Integer, ForeignKey("dbs.id"), nullable=False) - database = relationship( - "Database", - foreign_keys=[database_id], - backref=backref("alerts", cascade="all, delete-orphan"), - ) - last_eval_dttm = Column(DateTime, default=datetime.utcnow) last_state = Column(String(10)) @@ -100,3 +98,106 @@ class AlertLog(Model): @property def duration(self) -> int: return (self.dttm_end - self.dttm_start).total_seconds() + + +# TODO: Currently SQLObservation table will constantly grow with no limit, +# add some retention restriction or more to a more scalable db e.g. +# https://github.com/apache/incubator-superset/blob/master/superset/utils/log.py#L32 +class SQLObserver(Model, AuditMixinNullable): + """Runs SQL-based queries for alerts""" + + __tablename__ = "sql_observers" + + id = Column(Integer, primary_key=True) + sql = Column(Text, nullable=False) + + @declared_attr + def alert_id(self) -> int: + return Column(Integer, ForeignKey("alerts.id"), nullable=False) + + @declared_attr + def alert(self) -> RelationshipProperty: + return relationship( + "Alert", + foreign_keys=[self.alert_id], + backref=backref("sql_observer", cascade="all, delete-orphan"), + ) + + @declared_attr + def database_id(self) -> int: + return Column(Integer, ForeignKey("dbs.id"), nullable=False) + + @declared_attr + def database(self) -> RelationshipProperty: + return relationship( + "Database", + foreign_keys=[self.database_id], + backref=backref("sql_observers", cascade="all, delete-orphan"), + ) + + def get_last_observation(self) -> Optional[Any]: + observations = list( + db.session.query(SQLObservation) + .filter_by(observer_id=self.id) + .order_by(SQLObservation.dttm.desc()) + .limit(1) + ) + + if observations: + return observations[0] + + return None + + +class SQLObservation(Model): # pylint: disable=too-few-public-methods + """Keeps track of values retrieved from SQLObservers""" + + __tablename__ = "sql_observations" + + id = Column(Integer, primary_key=True) + dttm = Column(DateTime, default=datetime.utcnow, index=True) + observer_id = Column(Integer, ForeignKey("sql_observers.id"), nullable=False) + observer = relationship( + "SQLObserver", + foreign_keys=[observer_id], + backref=backref("observations", cascade="all, delete-orphan"), + ) + alert_id = Column(Integer, ForeignKey("alerts.id")) + alert = relationship( + "Alert", + foreign_keys=[alert_id], + backref=backref("observations", cascade="all, delete-orphan"), + ) + value = Column(Float) + error_msg = Column(String(500)) + + +class Validator(Model, AuditMixinNullable): + """Used to determine how an alert and its observations should be validated""" + + __tablename__ = "alert_validators" + + id = Column(Integer, primary_key=True) + validator_type = Column(String(100), nullable=False) + config = Column( + Text, + default=textwrap.dedent( + """ + { + + } + """ + ), + ) + + @declared_attr + def alert_id(self) -> int: + return Column(Integer, ForeignKey("alerts.id"), nullable=False) + + @declared_attr + def alert(self) -> RelationshipProperty: + return relationship( + "Alert", + foreign_keys=[self.alert_id], + backref=backref("validators", cascade="all, delete-orphan"), + ) diff --git a/superset/tasks/alerts/__init__.py b/superset/tasks/alerts/__init__.py new file mode 100644 index 0000000000000..fd9417fe5c1e9 --- /dev/null +++ b/superset/tasks/alerts/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/superset/tasks/alerts/observer.py b/superset/tasks/alerts/observer.py new file mode 100644 index 0000000000000..f7c5373fd72da --- /dev/null +++ b/superset/tasks/alerts/observer.py @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging +from datetime import datetime +from typing import Optional + +import pandas as pd + +from superset import db +from superset.models.alerts import Alert, SQLObservation +from superset.sql_parse import ParsedQuery + +logger = logging.getLogger("tasks.email_reports") + + +def observe(alert_id: int) -> Optional[str]: + """ + Runs the SQL query in an alert's SQLObserver and then + stores the result in a SQLObservation. + Returns an error message if the observer value was not valid + """ + + alert = db.session.query(Alert).filter_by(id=alert_id).one() + sql_observer = alert.sql_observer[0] + + value = None + + parsed_query = ParsedQuery(sql_observer.sql) + sql = parsed_query.stripped() + df = sql_observer.database.get_df(sql) + + error_msg = validate_observer_result(df, alert.id, alert.label) + + if not error_msg and df.to_records()[0][1] is not None: + value = float(df.to_records()[0][1]) + + observation = SQLObservation( + observer_id=sql_observer.id, + alert_id=alert_id, + dttm=datetime.utcnow(), + value=value, + error_msg=error_msg, + ) + + db.session.add(observation) + db.session.commit() + + return error_msg + + +def validate_observer_result( + sql_result: pd.DataFrame, alert_id: int, alert_label: str +) -> Optional[str]: + """ + Verifies if a DataFrame SQL query result to see if + it contains a valid value for a SQLObservation. + Returns an error message if the result is invalid. + """ + try: + assert ( + not sql_result.empty + ), f"Observer for alert <{alert_id}:{alert_label}> returned no rows" + + rows = sql_result.to_records() + + assert ( + len(rows) == 1 + ), f"Observer for alert <{alert_id}:{alert_label}> returned more than 1 row" + + assert ( + len(rows[0]) == 2 + ), f"Observer for alert <{alert_id}:{alert_label}> returned more than 1 column" + + if rows[0][1] is None: + return None + + float(rows[0][1]) + + except AssertionError as error: + return str(error) + except (TypeError, ValueError): + return ( + f"Observer for alert <{alert_id}:{alert_label}> returned a non-number value" + ) + + return None diff --git a/superset/tasks/alerts/validator.py b/superset/tasks/alerts/validator.py new file mode 100644 index 0000000000000..56dfad4dd63b0 --- /dev/null +++ b/superset/tasks/alerts/validator.py @@ -0,0 +1,113 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import enum +import json +from operator import eq, ge, gt, le, lt, ne +from typing import Callable, Optional + +import numpy as np + +from superset.exceptions import SupersetException +from superset.models.alerts import SQLObserver + +OPERATOR_FUNCTIONS = {">=": ge, ">": gt, "<=": le, "<": lt, "==": eq, "!=": ne} + + +class AlertValidatorType(enum.Enum): + not_null = "not null" + operator = "operator" + + @classmethod + def valid_type(cls, validator_type: str) -> bool: + return any(val_type.value == validator_type for val_type in cls) + + +def check_validator(validator_type: str, config: str) -> None: + if not AlertValidatorType.valid_type(validator_type): + raise SupersetException( + f"Error: {validator_type} is not a valid validator type." + ) + + config_dict = json.loads(config) + + if validator_type == AlertValidatorType.operator.value: + + if not (config_dict.get("op") and config_dict.get("threshold")): + raise SupersetException( + "Error: Operator Validator needs specified operator and threshold " + 'values. Add "op" and "threshold" to config.' + ) + + if not config_dict["op"] in OPERATOR_FUNCTIONS.keys(): + raise SupersetException( + f'Error: {config_dict["op"]} is an invalid operator type. Change ' + f'the "op" value in the config to one of ' + f'["<", "<=", ">", ">=", "==", "!="]' + ) + + if not isinstance(config_dict["threshold"], (int, float)): + raise SupersetException( + f'Error: {config_dict["threshold"]} is an invalid threshold value.' + f' Change the "threshold" value in the config.' + ) + + +def not_null_validator( + observer: SQLObserver, validator_config: str # pylint: disable=unused-argument +) -> bool: + """Returns True if a SQLObserver's recent observation is not NULL""" + + observation = observer.get_last_observation() + # TODO: Validate malformed observations/observations with errors separately + if ( + not observation + or observation.error_msg + or observation.value in (0, None, np.nan) + ): + return False + return True + + +def operator_validator(observer: SQLObserver, validator_config: str) -> bool: + """ + Returns True if a SQLObserver's recent observation is greater than or equal to + the value given in the validator config + """ + + observation = observer.get_last_observation() + if observation and observation.value not in (None, np.nan): + operator = json.loads(validator_config)["op"] + threshold = json.loads(validator_config)["threshold"] + if OPERATOR_FUNCTIONS[operator](observation.value, threshold): + return True + + return False + + +def get_validator_function( + validator_type: str, +) -> Optional[Callable[[SQLObserver, str], bool]]: + """Returns a validation function based on validator_type""" + + alert_validators = { + AlertValidatorType.not_null.value: not_null_validator, + AlertValidatorType.operator.value: operator_validator, + } + if alert_validators.get(validator_type.lower()): + return alert_validators[validator_type.lower()] + + return None diff --git a/superset/tasks/schedules.py b/superset/tasks/schedules.py index 2969bb6eac7c9..a4a7b5d87be7b 100644 --- a/superset/tasks/schedules.py +++ b/superset/tasks/schedules.py @@ -37,7 +37,6 @@ from urllib.error import URLError # pylint: disable=ungrouped-imports import croniter -import pandas as pd import simplejson as json from celery.app.task import Task from dateutil.tz import tzlocal @@ -52,7 +51,6 @@ from superset import app, db, security_manager, thumbnail_cache from superset.extensions import celery_app, machine_auth_provider_factory from superset.models.alerts import Alert, AlertLog -from superset.models.core import Database from superset.models.dashboard import Dashboard from superset.models.schedules import ( EmailDeliveryType, @@ -61,7 +59,8 @@ SliceEmailReportFormat, ) from superset.models.slice import Slice -from superset.sql_parse import ParsedQuery +from superset.tasks.alerts.observer import observe +from superset.tasks.alerts.validator import get_validator_function from superset.tasks.slack_util import deliver_slack_msg from superset.utils.core import get_email_address_list, send_email_smtp from superset.utils.screenshots import ChartScreenshot, WebDriverProxy @@ -74,7 +73,6 @@ from werkzeug.datastructures import TypeConversionDict from flask_appbuilder.security.sqla.models import User - # Globals config = app.config logger = logging.getLogger("tasks.email_reports") @@ -106,6 +104,7 @@ class ScreenshotData(NamedTuple): class AlertContent(NamedTuple): label: str # alert name sql: str # sql statement for alert + observation_value: str # value from observation that triggered the alert alert_url: str # url to alert details image_data: Optional[ScreenshotData] # data for the alert screenshot @@ -539,15 +538,7 @@ def schedule_alert_query( # pylint: disable=unused-argument return if report_type == ScheduleType.alert: - if recipients or slack_channel: - deliver_alert(schedule.id, recipients, slack_channel) - return - - if run_alert_query( - schedule.id, schedule.database_id, schedule.sql, schedule.label - ): - # deliver_dashboard OR deliver_slice - return + evaluate_alert(schedule.id, schedule.label, recipients, slack_channel) else: raise RuntimeError("Unknown report type") except NoSuchColumnError as column_error: @@ -565,18 +556,35 @@ class AlertState: def deliver_alert( - alert_id: int, recipients: Optional[str] = None, slack_channel: Optional[str] = None + alert_id: int, + recipients: Optional[str] = None, + slack_channel: Optional[str] = None, ) -> None: + """ + Gathers alert information and sends out the alert + to its respective email and slack recipients + """ + alert = db.session.query(Alert).get(alert_id) logging.info("Triggering alert: %s", alert) + + # Set all the values for the alert report + # Alternate values are used in the case of a test alert + # where an alert has no observations yet recipients = recipients or alert.recipients slack_channel = slack_channel or alert.slack_channel + sql = alert.sql_observer[0].sql if alert.sql_observer else "" + observation_value = ( + str(alert.observations[-1].value) if alert.observations else "Value" + ) + # TODO: add sql query results and validator information to alert content if alert.slice: alert_content = AlertContent( alert.label, - alert.sql, + sql, + observation_value, _get_url_path("AlertModelView.show", user_friendly=True, pk=alert_id), _get_slice_screenshot(alert.slice.id), ) @@ -584,7 +592,8 @@ def deliver_alert( # TODO: dashboard delivery! alert_content = AlertContent( alert.label, - alert.sql, + sql, + observation_value, _get_url_path("AlertModelView.show", user_friendly=True, pk=alert_id), None, ) @@ -596,7 +605,7 @@ def deliver_alert( def deliver_email_alert(alert_content: AlertContent, recipients: str) -> None: - # TODO add sql query results to email + """Delivers an email alert to the given email recipients""" subject = f"[Superset] Triggered alert: {alert_content.label}" deliver_as_group = False data = None @@ -613,6 +622,7 @@ def deliver_email_alert(alert_content: AlertContent, recipients: str) -> None: alert_url=alert_content.alert_url, label=alert_content.label, sql=alert_content.sql, + observation_value=alert_content.observation_value, image_url=image_url, ) @@ -620,6 +630,8 @@ def deliver_email_alert(alert_content: AlertContent, recipients: str) -> None: def deliver_slack_alert(alert_content: AlertContent, slack_channel: str) -> None: + """Delivers a slack alert to the given slack channel""" + subject = __("[Alert] %(label)s", label=alert_content.label) image = None @@ -628,6 +640,7 @@ def deliver_slack_alert(alert_content: AlertContent, slack_channel: str) -> None "slack/alert.txt", label=alert_content.label, sql=alert_content.sql, + observation_value=alert_content.observation_value, url=alert_content.image_data.url, alert_url=alert_content.alert_url, ) @@ -637,6 +650,7 @@ def deliver_slack_alert(alert_content: AlertContent, slack_channel: str) -> None "slack/alert_no_screenshot.txt", label=alert_content.label, sql=alert_content.sql, + observation_value=alert_content.observation_value, alert_url=alert_content.alert_url, ) @@ -645,55 +659,48 @@ def deliver_slack_alert(alert_content: AlertContent, slack_channel: str) -> None ) -def run_alert_query( - alert_id: int, database_id: int, sql: str, label: str -) -> Optional[bool]: - """ - Execute alert.sql and return value if any rows are returned - """ - logger.info("Processing alert ID: %i", alert_id) - database = db.session.query(Database).get(database_id) - if not database: - logger.error("Alert database not preset") - return None - - if not sql: - logger.error("Alert SQL not preset") - return None +def evaluate_alert( + alert_id: int, + label: str, + recipients: Optional[str] = None, + slack_channel: Optional[str] = None, +) -> None: + """Processes an alert to see if it should be triggered""" - parsed_query = ParsedQuery(sql) - sql = parsed_query.stripped() + logger.info("Processing alert ID: %i", alert_id) state = None dttm_start = datetime.utcnow() - df = pd.DataFrame() try: - logger.info("Evaluating SQL for alert <%s:%s>", alert_id, label) - df = database.get_df(sql) + logger.info("Querying observers for alert <%s:%s>", alert_id, label) + error_msg = observe(alert_id) + if error_msg: + state = AlertState.ERROR + logging.error(error_msg) except Exception as exc: # pylint: disable=broad-except state = AlertState.ERROR logging.exception(exc) - logging.error("Failed at evaluating alert: %s (%s)", label, alert_id) + logging.error("Failed at query observers for alert: %s (%s)", label, alert_id) dttm_end = datetime.utcnow() - last_eval_dttm = datetime.utcnow() if state != AlertState.ERROR: - if not df.empty: - # Looking for truthy cells - for row in df.to_records(): - if any(row): - state = AlertState.TRIGGER - deliver_alert(alert_id) - break - if not state: + # Don't validate alert on test runs since it may not be triggered + if recipients or slack_channel: + deliver_alert(alert_id, recipients, slack_channel) + state = AlertState.TRIGGER + # Validate during regular workflow and deliver only if triggered + elif validate_observations(alert_id, label): + deliver_alert(alert_id, recipients, slack_channel) + state = AlertState.TRIGGER + else: state = AlertState.PASS db.session.commit() alert = db.session.query(Alert).get(alert_id) if state != AlertState.ERROR: - alert.last_eval_dttm = last_eval_dttm + alert.last_eval_dttm = dttm_end alert.last_state = state alert.logs.append( AlertLog( @@ -705,7 +712,23 @@ def run_alert_query( ) db.session.commit() - return None + +def validate_observations(alert_id: int, label: str) -> bool: + """ + Runs an alert's validators to check if it should be triggered or not + If so, return the name of the validator that returned true + """ + + logger.info("Validating observations for alert <%s:%s>", alert_id, label) + + alert = db.session.query(Alert).get(alert_id) + if alert.validators: + validator = alert.validators[0] + validate = get_validator_function(validator.validator_type) + if validate and validate(alert.sql_observer[0], validator.config): + return True + + return False def next_schedules( diff --git a/superset/templates/email/alert.txt b/superset/templates/email/alert.txt index 0a2c623b91535..50ca6aa9cd491 100644 --- a/superset/templates/email/alert.txt +++ b/superset/templates/email/alert.txt @@ -18,7 +18,8 @@ -->

Alert: {{label}} ⚠

SQL Statement:

-{{sql}} +{{sql}}

+

SQL Result: {{observation_value}}

View Alert Details

Click here or the image below to view the chart related to this alert.

diff --git a/superset/templates/slack/alert.txt b/superset/templates/slack/alert.txt index 2264eea77cedf..80cfaa9c2c1ab 100644 --- a/superset/templates/slack/alert.txt +++ b/superset/templates/slack/alert.txt @@ -17,6 +17,7 @@ under the License. #} *Triggered Alert: {{label}} :redalert:* -SQL Statement:```{{sql}}``` +*SQL* *Statement*:```{{sql}}``` +*SQL* *Result*: {{observation_value}} <{{alert_url}}|View Alert Details> <{{url}}|*Explore in Superset*> diff --git a/superset/templates/slack/alert_no_screenshot.txt b/superset/templates/slack/alert_no_screenshot.txt index 84b74dafcb184..4e31f36201754 100644 --- a/superset/templates/slack/alert_no_screenshot.txt +++ b/superset/templates/slack/alert_no_screenshot.txt @@ -17,5 +17,6 @@ under the License. #} *Triggered Alert: {{label}} :redalert:* -SQL Statement:```{{sql}}``` +*SQL* *Statement*:```{{sql}}``` +*SQL* *Result*: {{observation_value}} <{{alert_url}}|View Alert Details> diff --git a/superset/views/alerts.py b/superset/views/alerts.py index 70573cbff3428..c550cc3e1d07b 100644 --- a/superset/views/alerts.py +++ b/superset/views/alerts.py @@ -23,9 +23,17 @@ from wtforms import BooleanField, Form, StringField from superset.constants import RouteMethod -from superset.models.alerts import Alert, AlertLog +from superset.models.alerts import ( + Alert, + AlertLog, + SQLObservation, + SQLObserver, + Validator, +) from superset.models.schedules import ScheduleType +from superset.tasks.alerts.validator import check_validator from superset.tasks.schedules import schedule_alert_query +from superset.utils import core as utils from superset.utils.core import get_email_address_str, markdown from ..exceptions import SupersetException @@ -47,6 +55,127 @@ class AlertLogModelView( ) +class AlertObservationModelView( + CompactCRUDMixin, SupersetModelView +): # pylint: disable=too-many-ancestors + datamodel = SQLAInterface(SQLObservation) + include_route_methods = {RouteMethod.LIST} | {"show"} + list_title = _("List Observations") + show_title = _("Show Observation") + list_columns = ( + "dttm", + "value", + "error_msg", + ) + label_columns = { + "error_msg": _("Error Message"), + } + + +# TODO: add a button to the form to test if the SQL statment can run with no errors +class SQLObserverInlineView( # pylint: disable=too-many-ancestors + CompactCRUDMixin, SupersetModelView +): + datamodel = SQLAInterface(SQLObserver) + include_route_methods = RouteMethod.RELATED_VIEW_SET | RouteMethod.API_SET + list_title = _("SQL Observers") + show_title = _("Show SQL Observer") + add_title = _("Add SQL Observer") + edit_title = _("Edit SQL Observer") + + edit_columns = [ + "alert", + "database", + "sql", + ] + + add_columns = edit_columns + + list_columns = ["alert.label", "database", "sql"] + + label_columns = { + "alert": _("Alert"), + "database": _("Database"), + "sql": _("SQL"), + } + + description_columns = { + "sql": _( + "A SQL statement that defines whether the alert should get triggered or " + "not. The query is expected to return either NULL or a number value." + ) + } + + def pre_add(self, item: "SQLObserverInlineView") -> None: + if item.alert.sql_observer and item.alert.sql_observer[0].id != item.id: + raise SupersetException("Error: An alert should only have one observer.") + + +class ValidatorInlineView( # pylint: disable=too-many-ancestors + CompactCRUDMixin, SupersetModelView +): + datamodel = SQLAInterface(Validator) + include_route_methods = RouteMethod.RELATED_VIEW_SET | RouteMethod.API_SET + list_title = _("Validators") + show_title = _("Show Validator") + add_title = _("Add Validator") + edit_title = _("Edit Validator") + + edit_columns = [ + "alert", + "validator_type", + "config", + ] + + add_columns = edit_columns + + list_columns = [ + "validator_type", + "alert.label", + ] + + label_columns = { + "validator_type": _("Validator Type"), + "alert": _("Alert"), + } + + description_columns = { + "validator_type": utils.markdown( + "Determines when to trigger alert based off value from SQLObserver query. " + "Alerts will be triggered with these validator types:" + "", + True, + ), + "config": utils.markdown( + "JSON string containing values the validator will compare against. " + "Each validator needs the following values:" + "", + True, + ), + } + + def pre_add(self, item: "ValidatorInlineView") -> None: + if item.alert.validators and item.alert.validators[0].id != item.id: + raise SupersetException( + "Error: Alerts currently only support 1 validator per alert." + ) + + item.validator_type = item.validator_type.lower() + check_validator(item.validator_type, item.config) + + def pre_update(self, item: "ValidatorInlineView") -> None: + item.validator_type = item.validator_type.lower() + check_validator(item.validator_type, item.config) + + class AlertModelView(SupersetModelView): # pylint: disable=too-many-ancestors datamodel = SQLAInterface(Alert) route_base = "/alert" @@ -58,7 +187,6 @@ class AlertModelView(SupersetModelView): # pylint: disable=too-many-ancestors list_columns = ( "label", - "database", "crontab", "last_eval_dttm", "last_state", @@ -68,8 +196,6 @@ class AlertModelView(SupersetModelView): # pylint: disable=too-many-ancestors "label", "active", "crontab", - "database", - "sql", # TODO: implement different types of alerts # "alert_type", "owners", @@ -85,18 +211,9 @@ class AlertModelView(SupersetModelView): # pylint: disable=too-many-ancestors "test_slack_channel", ) label_columns = { - "sql": "SQL", "log_retention": _("Log Retentions (days)"), } description_columns = { - "sql": _( - "A SQL statement that defines whether the alert should get " - "triggered or not. If the statement return no row, the alert " - "is not triggered. If the statement returns one or many rows, " - "the cells will be evaluated to see if they are 'truthy' " - "if any cell is truthy, the alert will fire. Truthy values " - "are non zero, non null, non empty strings." - ), "crontab": markdown( "A CRON-like expression. " "[Crontab Guru](https://crontab.guru/) is " @@ -134,7 +251,12 @@ class AlertModelView(SupersetModelView): # pylint: disable=too-many-ancestors } edit_form_extra_fields = add_form_extra_fields edit_columns = add_columns - related_views = [AlertLogModelView] + related_views = [ + AlertObservationModelView, + AlertLogModelView, + ValidatorInlineView, + SQLObserverInlineView, + ] def process_form(self, form: Form, is_created: bool) -> None: email_recipients = None diff --git a/tests/alerts_tests.py b/tests/alerts_tests.py index f8c7da9435bb2..6b5c503edc7e0 100644 --- a/tests/alerts_tests.py +++ b/tests/alerts_tests.py @@ -16,18 +16,32 @@ # under the License. """Unit tests for alerting in Superset""" import logging +from typing import Optional from unittest.mock import patch import pytest from superset import db -from superset.models.alerts import Alert, AlertLog -from superset.models.schedules import ScheduleType +from superset.exceptions import SupersetException +from superset.models.alerts import ( + Alert, + AlertLog, + SQLObservation, + SQLObserver, + Validator, +) from superset.models.slice import Slice +from superset.tasks.alerts.observer import observe +from superset.tasks.alerts.validator import ( + check_validator, + not_null_validator, + operator_validator, +) from superset.tasks.schedules import ( + AlertState, deliver_alert, - run_alert_query, - schedule_alert_query, + evaluate_alert, + validate_observations, ) from superset.utils import core as utils from tests.test_app import app @@ -40,115 +54,252 @@ @pytest.yield_fixture(scope="module") def setup_database(): with app.app_context(): - slice_id = db.session.query(Slice).all()[0].id - database_id = utils.get_example_database().id - - alerts = [ - Alert( - id=1, - label="alert_1", - active=True, - crontab="*/1 * * * *", - sql="SELECT 0", - alert_type="email", - slice_id=slice_id, - database_id=database_id, - ), - Alert( - id=2, - label="alert_2", - active=True, - crontab="*/1 * * * *", - sql="SELECT 55", - alert_type="email", - slice_id=slice_id, - recipients="recipient1@superset.com", - slack_channel="#test_channel", - database_id=database_id, - ), - Alert( - id=3, - label="alert_3", - active=False, - crontab="*/1 * * * *", - sql="UPDATE 55", - alert_type="email", - slice_id=slice_id, - database_id=database_id, - ), - Alert(id=4, active=False, label="alert_4", database_id=-1), - Alert(id=5, active=False, label="alert_5", database_id=database_id), - ] - - db.session.bulk_save_objects(alerts) + example_database = utils.get_example_database() + example_database.get_sqla_engine().execute( + "CREATE TABLE test_table AS SELECT 1 as first, 2 as second" + ) + example_database.get_sqla_engine().execute( + "INSERT INTO test_table (first, second) VALUES (3, 4)" + ) + + no_observer_alert = Alert(crontab="* * * * *", label="No Observer") + db.session.add(no_observer_alert) db.session.commit() yield db.session + db.session.query(SQLObservation).delete() + db.session.query(SQLObserver).delete() + db.session.query(Validator).delete() db.session.query(AlertLog).delete() db.session.query(Alert).delete() +def create_alert( + dbsession, + sql: str, + validator_type: Optional[str] = None, + validator_config: Optional[str] = None, +) -> Alert: + alert = Alert( + label="test_alert", + active=True, + crontab="* * * * *", + slice_id=dbsession.query(Slice).all()[0].id, + recipients="recipient1@superset.com", + slack_channel="#test_channel", + ) + dbsession.add(alert) + dbsession.commit() + + sql_observer = SQLObserver( + sql=sql, alert_id=alert.id, database_id=utils.get_example_database().id, + ) + + if validator_type and validator_config: + validator = Validator( + validator_type=validator_type, config=validator_config, alert_id=alert.id, + ) + + dbsession.add(validator) + + dbsession.add(sql_observer) + dbsession.commit() + return alert + + +def test_alert_observer(setup_database): + dbsession = setup_database + + # Test SQLObserver with int SQL return + alert1 = create_alert(dbsession, "SELECT 55") + observe(alert1.id) + assert alert1.sql_observer[0].observations[-1].value == 55.0 + assert alert1.sql_observer[0].observations[-1].error_msg is None + + # Test SQLObserver with double SQL return + alert2 = create_alert(dbsession, "SELECT 30.0 as wage") + observe(alert2.id) + assert alert2.sql_observer[0].observations[-1].value == 30.0 + assert alert2.sql_observer[0].observations[-1].error_msg is None + + # Test SQLObserver with NULL result + alert3 = create_alert(dbsession, "SELECT null as null_result") + observe(alert3.id) + assert alert3.sql_observer[0].observations[-1].value is None + assert alert3.sql_observer[0].observations[-1].error_msg is None + + # Test SQLObserver with empty SQL return + alert4 = create_alert(dbsession, "SELECT first FROM test_table WHERE first = -1") + observe(alert4.id) + assert alert4.sql_observer[0].observations[-1].value is None + assert alert4.sql_observer[0].observations[-1].error_msg is not None + + # Test SQLObserver with str result + alert5 = create_alert(dbsession, "SELECT 'test_string' as string_value") + observe(alert5.id) + assert alert5.sql_observer[0].observations[-1].value is None + assert alert5.sql_observer[0].observations[-1].error_msg is not None + + # Test SQLObserver with two row result + alert6 = create_alert(dbsession, "SELECT first FROM test_table") + observe(alert6.id) + assert alert6.sql_observer[0].observations[-1].value is None + assert alert6.sql_observer[0].observations[-1].error_msg is not None + + # Test SQLObserver with two column result + alert7 = create_alert( + dbsession, "SELECT first, second FROM test_table WHERE first = 1" + ) + observe(alert7.id) + assert alert7.sql_observer[0].observations[-1].value is None + assert alert7.sql_observer[0].observations[-1].error_msg is not None + + @patch("superset.tasks.schedules.deliver_alert") -@patch("superset.tasks.schedules.logging.Logger.error") -def test_run_alert_query(mock_error, mock_deliver_alert, setup_database): +def test_evaluate_alert(mock_deliver_alert, setup_database): dbsession = setup_database - # Test passing alert with null SQL result - alert1 = dbsession.query(Alert).filter_by(id=1).one() - run_alert_query(alert1.id, alert1.database_id, alert1.sql, alert1.label) - assert mock_deliver_alert.call_count == 0 - assert mock_error.call_count == 0 + # Test error with Observer SQL statement + alert1 = create_alert(dbsession, "$%^&") + evaluate_alert(alert1.id, alert1.label) + assert alert1.logs[-1].state == AlertState.ERROR - # Test passing alert with True SQL result - alert2 = dbsession.query(Alert).filter_by(id=2).one() - run_alert_query(alert2.id, alert2.database_id, alert2.sql, alert2.label) - assert mock_deliver_alert.call_count == 1 - assert mock_error.call_count == 0 + # Test error with alert lacking observer + alert2 = dbsession.query(Alert).filter_by(label="No Observer").one() + evaluate_alert(alert2.id, alert2.label) + assert alert2.logs[-1].state == AlertState.ERROR - # Test passing alert with error in SQL query - alert3 = dbsession.query(Alert).filter_by(id=3).one() - run_alert_query(alert3.id, alert3.database_id, alert3.sql, alert3.label) - assert mock_deliver_alert.call_count == 1 - assert mock_error.call_count == 2 + # Test pass on alert lacking validator + alert3 = create_alert(dbsession, "SELECT 55") + evaluate_alert(alert3.id, alert3.label) + assert alert3.logs[-1].state == AlertState.PASS - # Test passing alert with invalid database - alert4 = dbsession.query(Alert).filter_by(id=4).one() - run_alert_query(alert4.id, alert4.database_id, alert4.sql, alert4.label) + # Test triggering successful alert + alert4 = create_alert(dbsession, "SELECT 55", "not null", "{}") + evaluate_alert(alert4.id, alert4.label) assert mock_deliver_alert.call_count == 1 - assert mock_error.call_count == 3 + assert alert4.logs[-1].state == AlertState.TRIGGER - # Test passing alert with no SQL statement - alert5 = dbsession.query(Alert).filter_by(id=5).one() - run_alert_query(alert5.id, alert5.database_id, alert5.sql, alert5.label) - assert mock_deliver_alert.call_count == 1 - assert mock_error.call_count == 4 +def test_check_validator(): + # Test with invalid operator type + with pytest.raises(SupersetException): + check_validator("greater than", "{}") -@patch("superset.tasks.schedules.deliver_alert") -@patch("superset.tasks.schedules.run_alert_query") -def test_schedule_alert_query(mock_run_alert, mock_deliver_alert, setup_database): + # Test with empty config + with pytest.raises(SupersetException): + check_validator("operator", "{}") + + # Test with invalid operator + with pytest.raises(SupersetException): + check_validator("operator", '{"op": "is", "threshold":50.0}') + + # Test with invalid operator + with pytest.raises(SupersetException): + check_validator("operator", '{"op": "is", "threshold":50.0}') + + # Test with invalid threshold + with pytest.raises(SupersetException): + check_validator("operator", '{"op": "is", "threshold":"hello"}') + + # Test with float threshold and no errors + assert check_validator("operator", '{"op": ">=", "threshold": 50.0}') is None + + # Test with int threshold and no errors + assert check_validator("operator", '{"op": "==", "threshold": 50}') is None + + +def test_not_null_validator(setup_database): + dbsession = setup_database + + # Test passing SQLObserver with 'null' SQL result + alert1 = create_alert(dbsession, "SELECT 0") + observe(alert1.id) + assert not_null_validator(alert1.sql_observer[0], "{}") is False + + # Test passing SQLObserver with empty SQL result + alert2 = create_alert(dbsession, "SELECT first FROM test_table WHERE first = -1") + observe(alert2.id) + assert not_null_validator(alert2.sql_observer[0], "{}") is False + + # Test triggering alert with non-null SQL result + alert3 = create_alert(dbsession, "SELECT 55") + observe(alert3.id) + assert not_null_validator(alert3.sql_observer[0], "{}") is True + + +def test_operator_validator(setup_database): dbsession = setup_database - active_alert = dbsession.query(Alert).filter_by(id=1).one() - inactive_alert = dbsession.query(Alert).filter_by(id=3).one() - - # Test that inactive alerts are no processed - schedule_alert_query(report_type=ScheduleType.alert, schedule_id=inactive_alert.id) - assert mock_run_alert.call_count == 0 - assert mock_deliver_alert.call_count == 0 - - # Test that active alerts with no recipients passed in are processed regularly - schedule_alert_query(report_type=ScheduleType.alert, schedule_id=active_alert.id) - assert mock_run_alert.call_count == 1 - assert mock_deliver_alert.call_count == 0 - - # Test that active alerts sent as a test are delivered immediately - schedule_alert_query( - report_type=ScheduleType.alert, - schedule_id=active_alert.id, - recipients="testing@email.com", + + # Test passing SQLObserver with empty SQL result + alert1 = create_alert(dbsession, "SELECT first FROM test_table WHERE first = -1") + observe(alert1.id) + assert ( + operator_validator(alert1.sql_observer[0], '{"op": ">=", "threshold": 60}') + is False ) - assert mock_run_alert.call_count == 1 - assert mock_deliver_alert.call_count == 1 + + # Test passing SQLObserver with result that doesn't pass a greater than threshold + alert2 = create_alert(dbsession, "SELECT 55") + observe(alert2.id) + assert ( + operator_validator(alert2.sql_observer[0], '{"op": ">=", "threshold": 60}') + is False + ) + + # Test passing SQLObserver with result that passes a greater than threshold + assert ( + operator_validator(alert2.sql_observer[0], '{"op": ">=", "threshold": 40}') + is True + ) + + # Test passing SQLObserver with result that doesn't pass a less than threshold + assert ( + operator_validator(alert2.sql_observer[0], '{"op": "<=", "threshold": 40}') + is False + ) + + # Test passing SQLObserver with result that passes threshold + assert ( + operator_validator(alert2.sql_observer[0], '{"op": "<=", "threshold": 60}') + is True + ) + + # Test passing SQLObserver with result that doesn't equal threshold + assert ( + operator_validator(alert2.sql_observer[0], '{"op": "==", "threshold": 60}') + is False + ) + + # Test passing SQLObserver with result that equals threshold + assert ( + operator_validator(alert2.sql_observer[0], '{"op": "==", "threshold": 55}') + is True + ) + + +def test_validate_observations(setup_database): + dbsession = setup_database + + # Test False on alert with no validator + alert1 = create_alert(dbsession, "SELECT 55") + assert validate_observations(alert1.id, alert1.label) is False + + # Test False on alert with no observations + alert2 = create_alert(dbsession, "SELECT 55", "not null", "{}") + assert validate_observations(alert2.id, alert2.label) is False + + # Test False on alert that shouldnt be triggered + alert3 = create_alert(dbsession, "SELECT 0", "not null", "{}") + observe(alert3.id) + assert validate_observations(alert3.id, alert3.label) is False + + # Test True on alert that should be triggered + alert4 = create_alert( + dbsession, "SELECT 55", "operator", '{"op": "<=", "threshold": 60}' + ) + observe(alert4.id) + assert validate_observations(alert4.id, alert4.label) is True @patch("superset.tasks.slack_util.WebClient.files_upload") @@ -159,7 +310,8 @@ def test_deliver_alert_screenshot( screenshot_mock, url_mock, email_mock, file_upload_mock, setup_database ): dbsession = setup_database - alert = dbsession.query(Alert).filter_by(id=2).one() + alert = create_alert(dbsession, "SELECT 55") + observe(alert.id) screenshot = read_fixture("sample.png") screenshot_mock.return_value = screenshot @@ -176,7 +328,9 @@ def test_deliver_alert_screenshot( "channels": alert.slack_channel, "file": screenshot, "initial_comment": f"\n*Triggered Alert: {alert.label} :redalert:*\n" - f"SQL Statement:```{alert.sql}```\n\n", "title": f"[Alert] {alert.label}",