From 73b5646dc597f50a7e2e8132f2f026ac8922b1fc Mon Sep 17 00:00:00 2001 From: Reuven Gonzales Date: Thu, 16 Jan 2025 17:15:05 -0800 Subject: [PATCH] Fix openrank model execution (#2794) --- .../openrank/int_openrank_developer.py | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/warehouse/metrics_mesh/models/intermediate/openrank/int_openrank_developer.py b/warehouse/metrics_mesh/models/intermediate/openrank/int_openrank_developer.py index eb2f09bf9..2baf780be 100644 --- a/warehouse/metrics_mesh/models/intermediate/openrank/int_openrank_developer.py +++ b/warehouse/metrics_mesh/models/intermediate/openrank/int_openrank_developer.py @@ -4,19 +4,22 @@ import pandas as pd from openrank_sdk import EigenTrust from sqlmesh import ExecutionContext, model +from sqlmesh.core.dialect import parse_one # simple illustration of a weighting function def weight_contributions(bucket_month, amount): year = bucket_month.year - weight = ((2018-year)/4) * (amount ** 0.5) + weight = ((2018 - year) / 4) * (amount**0.5) return weight + # another simple illustration of a weighting function def weight_events(event_type, amount): - weight = (amount ** .5) * (0.5 if event_type == 'STARRED' else 1.0) + weight = (amount**0.5) * (0.5 if event_type == "STARRED" else 1.0) return weight + @model( "metrics.int_openrank_developer", kind="full", @@ -25,7 +28,7 @@ def weight_events(event_type, amount): "metrics.int_events_monthly_to_project", ], columns={ - #"id": "int", + # "id": "int", "i": "text", "v": "float", }, @@ -48,17 +51,18 @@ def execute( events_table = context.resolve_table("metrics.int_events_monthly_to_project") # Get trusted developers - seeded_developers_query = f""" + seeded_developers_query = parse_one( + f""" with targeted_repos as ( select artifact_id, artifact_namespace, artifact_name from {artifacts_table} - where (artifact_source, artifact_namespace, artifact_name) in ( - ('GITHUB', 'ethereum', 'eips'), - ('GITHUB', 'ethereum', 'solidity'), - ('GITHUB', 'ethereum', 'go-ethereum'), + where CONCAT(artifact_source, artifact_namespace, artifact_name) in ( + CONCAT('GITHUB', 'ethereum', 'eips'), + CONCAT('GITHUB', 'ethereum', 'solidity'), + CONCAT('GITHUB', 'ethereum', 'go-ethereum'), ) ) @@ -75,27 +79,31 @@ def execute( on e.to_artifact_id = targeted_repos.artifact_id where e.event_type = 'COMMIT_CODE' - and e.bucket_month > '2014-01-01' + and e.bucket_month > date '2014-01-01' and e.to_artifact_id in ( select artifact_id from targeted_repos ) - """ - seeded_developers = context.fetchdf(seeded_developers_query) + """, + dialect="duckdb", + ) + seeded_developers = context.fetchdf(seeded_developers_query.sql(dialect="trino")) if seeded_developers.empty: yield from () return # Generate pretrust values - seeded_developers['v'] = seeded_developers.apply(lambda x: weight_contributions(x['bucket_month'], x['amount']), axis=1) + seeded_developers["v"] = seeded_developers.apply( + lambda x: weight_contributions(x["bucket_month"], x["amount"]), axis=1 + ) pretrust = [] developers = [] - for (i,v) in seeded_developers.groupby('user')['v'].sum().items(): - pretrust.append({'i': i, 'v': v}) + for i, v in seeded_developers.groupby("user")["v"].sum().items(): + pretrust.append({"i": i, "v": v}) developers.append(i) # Get trusted repos - developers_str = "'"+"','".join(developers)+"'" + developers_str = "'" + "','".join(developers) + "'" trusted_repos_query = f""" with trusted_repos as ( select @@ -137,19 +145,25 @@ def execute( return # Construct a graph from the event data - trusted_repos['v'] = trusted_repos.apply(lambda x: weight_events(x['event_type'], x['amount']), axis=1) + trusted_repos["v"] = trusted_repos.apply( + lambda x: weight_events(x["event_type"], x["amount"]), axis=1 + ) localtrust = [] - for item in trusted_repos.groupby(['trusted_user', 'github_user'])['v'].sum().items(): - #((i, j), v) = item + for item in ( + trusted_repos.groupby(["trusted_user", "github_user"])["v"].sum().items() + ): + # ((i, j), v) = item k = t.cast(t.Tuple[str, str], item[0]) i, j = k v = float(item[1]) - if i == j or '[bot]' in j: + if i == j or "[bot]" in j: continue - localtrust.append({'i': i, 'j': j, 'v': v}) + localtrust.append({"i": i, "j": j, "v": v}) # Run EigenTrust over the graph - pretrust_updated = [x for x in pretrust if x['i'] in trusted_repos['trusted_user'].unique()] + pretrust_updated = [ + x for x in pretrust if x["i"] in trusted_repos["trusted_user"].unique() + ] a = EigenTrust() dev_rank = a.run_eigentrust(localtrust, pretrust_updated)