From 7ef186f56cdedbd2d7d07cc07cd907038a5584f7 Mon Sep 17 00:00:00 2001 From: Valay Dave Date: Tue, 21 Jun 2022 05:52:05 +0000 Subject: [PATCH] refactor : -`RUN_ID_LEN` to `RUN_HASH_ID_LEN` - `TASK_ID_LEN` to `TASK_ID_HASH_LEN` --- metaflow/plugins/airflow/airflow.py | 4 ++-- metaflow/plugins/airflow/airflow_utils.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/metaflow/plugins/airflow/airflow.py b/metaflow/plugins/airflow/airflow.py index 49213112ca8..4b6b070cdda 100644 --- a/metaflow/plugins/airflow/airflow.py +++ b/metaflow/plugins/airflow/airflow.py @@ -32,7 +32,7 @@ from .sensors import SUPPORTED_SENSORS from .airflow_utils import ( AIRFLOW_TASK_ID, - RUN_ID_LEN, + RUN_HASH_ID_LEN, RUN_ID_PREFIX, TASK_ID_XCOM_KEY, AirflowTask, @@ -53,7 +53,7 @@ class Airflow(object): # Such run-ids break the `metaflow.util.decompress_list`; this is why we hash the runid run_id = ( "%s-$(echo -n {{ run_id }}-{{ dag_run.dag_id }} | md5sum | awk '{print $1}' | awk '{print substr ($0, 0, %s)}')" - % (RUN_ID_PREFIX, str(RUN_ID_LEN)) + % (RUN_ID_PREFIX, str(RUN_HASH_ID_LEN)) ) # We do echo -n because emits line breaks and we dont want to consider that since it we want same hash value when retrieved in python. run_id_arg = "--run-id %s" % run_id diff --git a/metaflow/plugins/airflow/airflow_utils.py b/metaflow/plugins/airflow/airflow_utils.py index 4bc27b2bc0d..05707b82d19 100644 --- a/metaflow/plugins/airflow/airflow_utils.py +++ b/metaflow/plugins/airflow/airflow_utils.py @@ -19,8 +19,8 @@ class AirflowSensorNotFound(Exception): TASK_ID_XCOM_KEY = "metaflow_task_id" -RUN_ID_LEN = 12 -TASK_ID_LEN = 8 +RUN_HASH_ID_LEN = 12 +TASK_ID_HASH_LEN = 8 RUN_ID_PREFIX = "airflow" # AIRFLOW_TASK_ID will work for linear/branched workflows. @@ -48,14 +48,14 @@ def get_supported_sensors(cls): def run_id_creator(val): # join `[dag-id,run-id]` of airflow dag. - return hashlib.md5("-".join(val).encode("utf-8")).hexdigest()[:RUN_ID_LEN] + return hashlib.md5("-".join(val).encode("utf-8")).hexdigest()[:RUN_HASH_ID_LEN] def task_id_creator(lst): # This is a filter which creates a hash of the run_id/step_name string. # Since run_ids in airflow are constants, they don't create an issue with the # - return hashlib.md5("/".join(lst).encode("utf-8")).hexdigest()[:TASK_ID_LEN] + return hashlib.md5("/".join(lst).encode("utf-8")).hexdigest()[:TASK_ID_HASH_LEN] def json_dump(val):