From 46de20c5e17a0064589cdd367db2d5b24be09a3d Mon Sep 17 00:00:00 2001 From: schultztimothy Date: Wed, 14 Aug 2024 07:47:44 -0600 Subject: [PATCH] feat: batch size as env variable and return metadata --- api/passport/api.py | 6 +++--- api/passport/test/test_analysis.py | 2 +- .../commands/process_batch_model_address_upload.py | 10 ++++++++-- api/scorer/settings/base.py | 1 + 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/api/passport/api.py b/api/passport/api.py index eaf05fadb..eb4e13ef8 100644 --- a/api/passport/api.py +++ b/api/passport/api.py @@ -34,7 +34,7 @@ class ScoreModel(Schema): class DetailedScoreModel(Schema): score: int - n_transactions: Optional[int] + num_transactions: Optional[int] first_funder: Optional[str] first_funder_amount: Optional[int] @@ -103,7 +103,7 @@ async def fetch(session, url, data): "data": { "human_probability": -1, "n_transactions": -1, - "error": "Error fetching model response", + "error": str(e), } } @@ -185,7 +185,7 @@ async def handle_get_analysis( ret.details.models[model] = DetailedScoreModel( score=score, - n_transactions=num_transactions, + num_transactions=num_transactions, first_funder=first_funder, first_funder_amount=first_funder_amount, ) diff --git a/api/passport/test/test_analysis.py b/api/passport/test/test_analysis.py index c8f716cdb..4bc7ea28e 100644 --- a/api/passport/test/test_analysis.py +++ b/api/passport/test/test_analysis.py @@ -194,7 +194,7 @@ def test_handle_get_analysis_returns_additional_data(self, mock_fetch): ) assert analysis.details.models["zksync"].score == 95 - assert analysis.details.models["zksync"].n_transactions == 10 + assert analysis.details.models["zksync"].num_transactions == 10 assert analysis.details.models["zksync"].first_funder == "funder" assert analysis.details.models["zksync"].first_funder_amount == 1000 diff --git a/api/registry/management/commands/process_batch_model_address_upload.py b/api/registry/management/commands/process_batch_model_address_upload.py index 9b89344c4..889493d7f 100644 --- a/api/registry/management/commands/process_batch_model_address_upload.py +++ b/api/registry/management/commands/process_batch_model_address_upload.py @@ -16,6 +16,7 @@ from registry.admin import get_s3_client from registry.models import BatchModelScoringRequest, BatchRequestStatus from scorer.settings import ( + BULK_MODEL_SCORE_BATCH_SIZE, BULK_MODEL_SCORE_REQUESTS_RESULTS_FOLDER, BULK_SCORE_REQUESTS_ADDRESS_LIST_FOLDER, BULK_SCORE_REQUESTS_BUCKET_NAME, @@ -99,7 +100,7 @@ def download_from_s3(self, s3_filename): except Exception as e: raise CommandError(f"Failed to download file from S3: {str(e)}") - def process_csv_in_batches(self, csv_data, batch_size=300): + def process_csv_in_batches(self, csv_data, batch_size=BULK_MODEL_SCORE_BATCH_SIZE): while True: batch = list(islice(csv_data, batch_size)) if not batch: @@ -137,7 +138,12 @@ async def process_address(self, address, model_list): details_dict = { "models": { - model: {"score": score.score} + model: { + "score": score.score, + "num_transactions": score.num_transactions, + "first_funder": score.first_funder, + "first_funder_amount": score.first_funder_amount, + } for model, score in analysis.details.models.items() } } diff --git a/api/scorer/settings/base.py b/api/scorer/settings/base.py index a80317286..c3db86abc 100644 --- a/api/scorer/settings/base.py +++ b/api/scorer/settings/base.py @@ -463,6 +463,7 @@ BULK_MODEL_SCORE_REQUESTS_RESULTS_FOLDER = env( "BULK_MODEL_SCORE_REQUESTS_RESULTS_FOLDER", default="model-score-results" ) +BULK_MODEL_SCORE_BATCH_SIZE = env("BULK_MODEL_SCORE_BATCH_SIZE", default=50) DATA_SCIENCE_API_KEY = env("DATA_SCIENCE_API_KEY", default="abc") VERIFIER_URL = env("VERIFIER_URL", default="http://localhost:8001/verifier/verify")