From 648b262ed79c8aaf857580615d82d4d6dbe4e7ca Mon Sep 17 00:00:00 2001 From: voetberg Date: Wed, 6 Mar 2024 08:28:28 -0600 Subject: [PATCH] Common: Update probe to use prometheus pusher, sqla 2.0 syntax and data model. #127 * Changes: - Change text-only queries to poll the data model (rucio.db.sqla.models) - Push results to a remote (See documentation of probes for discriptions) Names: locked_expired_rules.(rse), locked_expired_rules.dids.(rse), locked_expired_rules.rules_for_dids.(rse) --- common/check_expired_locked_rules | 131 +++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 37 deletions(-) diff --git a/common/check_expired_locked_rules b/common/check_expired_locked_rules index 55e139a1..7d35d910 100755 --- a/common/check_expired_locked_rules +++ b/common/check_expired_locked_rules @@ -1,56 +1,113 @@ -#!/usr/bin/env python -# Copyright European Organization for Nuclear Research (CERN) 2013 +#!/usr/bin/env python3 +# Copyright 2012-2024 CERN # # Licensed under the Apache License, Version 2.0 (the "License"); -# You may not use this file except in compliance with the License. -# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # # Authors: # - Cedric Serfon, , 2015 +# - Donata Mielaikaite, , 2020 +# - Eric Vaandering, , 2020 +# - Maggie Voetberg , 2024 + ''' Probe to check the locked expired rules or datasets with locked rules ''' import sys -from rucio.db.sqla.session import get_session +import traceback + +from rucio.db.sqla import models, session +from sqlalchemy import func, select, functions, and_ + +from utils.common import PrometheusPusher # Exit statuses OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3 +if __name__ == '__main__': -def main(): - ''' - Probe to check the locked expired rules or datasets with locked rules - ''' status = OK - session = get_session() - try: - query = "select rawtohex(id), scope, name, rse_expression from atlas_rucio.rules where locked=1 and expires_at 0: + status = CRITICAL -if __name__ == "__main__": - main() + except Exception as error: + print(traceback.format_exc()) + sys.exit(UNKNOWN) + + try: + statement = select( + func.count(models.ReplicationRule.id), + func.count(models.DataIdentifier.name), + models.ReplicationRule.rse_expression + ).join( + models.DataIdentifier, + (models.ReplicationRule.scope == models.DataIdentifier.scope) & (models.ReplicationRule.name == models.DataIdentifier.name) + ).where( + and_( + models.ReplicationRule.locked == True, + models.DataIdentifier.expired_at != None, + models.DataIdentifier.expired_at < functions.current_timestamp() + ) + ). group_by( + models.ReplicationRule.rse_expression + ) + + query = session.execute(statement).scalars() + if len(query.all()) == 0: + query = [(0, 0, 'null')] + + for rules, dids, rse_expression in query: + manager.gauge('locked_expired_rules.dids.{rse_expression}', + documentation='Number of expired DIDs with locked rules, by RSE' + ).labels( + rse_expression=rse_expression + ).set(dids) + manager.gauge('locked_expired_rules.rules_for_dids.{rse_expression}', + documentation='Number of locked rules for expired DIDs, by RSE' + ).labels( + rse_expression=rse_expression + ).set(rules) + + if (rules>0) or (dids>0): + status = CRITICAL + + except: + print(traceback.format_exc()) + sys.exit(UNKNOWN) + + sys.exit(status)