From 46de20c5e17a0064589cdd367db2d5b24be09a3d Mon Sep 17 00:00:00 2001
From: schultztimothy <schultz.timothy52@gmail.com>
Date: Wed, 14 Aug 2024 07:47:44 -0600
Subject: [PATCH] feat: batch size as env variable and return metadata

---
 api/passport/api.py                                    |  6 +++---
 api/passport/test/test_analysis.py                     |  2 +-
 .../commands/process_batch_model_address_upload.py     | 10 ++++++++--
 api/scorer/settings/base.py                            |  1 +
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/api/passport/api.py b/api/passport/api.py
index eaf05fadb..eb4e13ef8 100644
--- a/api/passport/api.py
+++ b/api/passport/api.py
@@ -34,7 +34,7 @@ class ScoreModel(Schema):
 
 class DetailedScoreModel(Schema):
     score: int
-    n_transactions: Optional[int]
+    num_transactions: Optional[int]
     first_funder: Optional[str]
     first_funder_amount: Optional[int]
 
@@ -103,7 +103,7 @@ async def fetch(session, url, data):
             "data": {
                 "human_probability": -1,
                 "n_transactions": -1,
-                "error": "Error fetching model response",
+                "error": str(e),
             }
         }
 
@@ -185,7 +185,7 @@ async def handle_get_analysis(
 
                 ret.details.models[model] = DetailedScoreModel(
                     score=score,
-                    n_transactions=num_transactions,
+                    num_transactions=num_transactions,
                     first_funder=first_funder,
                     first_funder_amount=first_funder_amount,
                 )
diff --git a/api/passport/test/test_analysis.py b/api/passport/test/test_analysis.py
index c8f716cdb..4bc7ea28e 100644
--- a/api/passport/test/test_analysis.py
+++ b/api/passport/test/test_analysis.py
@@ -194,7 +194,7 @@ def test_handle_get_analysis_returns_additional_data(self, mock_fetch):
         )
 
         assert analysis.details.models["zksync"].score == 95
-        assert analysis.details.models["zksync"].n_transactions == 10
+        assert analysis.details.models["zksync"].num_transactions == 10
         assert analysis.details.models["zksync"].first_funder == "funder"
         assert analysis.details.models["zksync"].first_funder_amount == 1000
 
diff --git a/api/registry/management/commands/process_batch_model_address_upload.py b/api/registry/management/commands/process_batch_model_address_upload.py
index 9b89344c4..889493d7f 100644
--- a/api/registry/management/commands/process_batch_model_address_upload.py
+++ b/api/registry/management/commands/process_batch_model_address_upload.py
@@ -16,6 +16,7 @@
 from registry.admin import get_s3_client
 from registry.models import BatchModelScoringRequest, BatchRequestStatus
 from scorer.settings import (
+    BULK_MODEL_SCORE_BATCH_SIZE,
     BULK_MODEL_SCORE_REQUESTS_RESULTS_FOLDER,
     BULK_SCORE_REQUESTS_ADDRESS_LIST_FOLDER,
     BULK_SCORE_REQUESTS_BUCKET_NAME,
@@ -99,7 +100,7 @@ def download_from_s3(self, s3_filename):
         except Exception as e:
             raise CommandError(f"Failed to download file from S3: {str(e)}")
 
-    def process_csv_in_batches(self, csv_data, batch_size=300):
+    def process_csv_in_batches(self, csv_data, batch_size=BULK_MODEL_SCORE_BATCH_SIZE):
         while True:
             batch = list(islice(csv_data, batch_size))
             if not batch:
@@ -137,7 +138,12 @@ async def process_address(self, address, model_list):
 
             details_dict = {
                 "models": {
-                    model: {"score": score.score}
+                    model: {
+                        "score": score.score,
+                        "num_transactions": score.num_transactions,
+                        "first_funder": score.first_funder,
+                        "first_funder_amount": score.first_funder_amount,
+                    }
                     for model, score in analysis.details.models.items()
                 }
             }
diff --git a/api/scorer/settings/base.py b/api/scorer/settings/base.py
index a80317286..c3db86abc 100644
--- a/api/scorer/settings/base.py
+++ b/api/scorer/settings/base.py
@@ -463,6 +463,7 @@
 BULK_MODEL_SCORE_REQUESTS_RESULTS_FOLDER = env(
     "BULK_MODEL_SCORE_REQUESTS_RESULTS_FOLDER", default="model-score-results"
 )
+BULK_MODEL_SCORE_BATCH_SIZE = env("BULK_MODEL_SCORE_BATCH_SIZE", default=50)
 DATA_SCIENCE_API_KEY = env("DATA_SCIENCE_API_KEY", default="abc")
 
 VERIFIER_URL = env("VERIFIER_URL", default="http://localhost:8001/verifier/verify")