-
Notifications
You must be signed in to change notification settings - Fork 108
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(api): creating the data_models app to manage the data models use… (
#564) * feat(api): creating the data_models app to manage the data models used for caching of the data-models API * feat(api): have reworked 'scorer_dump_data_parquet' and added new command 'scorer_dump_data_eth_model_score'. Have also updated docker-compose * feat(api): adding test for 'scorer_dump_data_eth_model_score' * fix(api): fix broken commend 'scorer_dump_data_parquet' * feat(api): infra changes, fixes, also tagging as not managed by django * fix(api): fix test 'test_cmd_scorer_dump_data_eth_model_score', and move it to 'data_model' app --------- Co-authored-by: Gerald Iakobinyi-Pich <gerald@gitcoin.co>
- Loading branch information
Showing
20 changed files
with
677 additions
and
335 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,6 @@ api/*.sqlite3 | |
**/node_modules/** | ||
|
||
postgres_db_data | ||
postgres_db_passport_data | ||
|
||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
130 changes: 130 additions & 0 deletions
130
api/ceramic_cache/management/commands/scorer_dump_data_eth_model_score.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import json | ||
import traceback | ||
from urllib.parse import urlparse | ||
|
||
|
||
from django.core.management.base import BaseCommand | ||
from scorer.export_utils import ( | ||
export_data_for_model, | ||
upload_to_s3, | ||
) | ||
from data_model.models import Cache | ||
from contextlib import contextmanager | ||
from django.core.serializers.json import DjangoJSONEncoder | ||
from logging import getLogger | ||
|
||
log = getLogger(__name__) | ||
|
||
|
||
def get_writer(output_file): | ||
@contextmanager | ||
def eth_stamp_writer_context_manager(queryset): | ||
try: | ||
with open(output_file, "w", encoding="utf-8") as file: | ||
|
||
class WriterWrappe: | ||
def __init__(self, file): | ||
self.file = file | ||
|
||
def write_batch(self, data): | ||
for d in data: | ||
try: | ||
key = json.loads(d["key"]) | ||
value = d["value"] | ||
address = key[1].lower() | ||
self.file.write( | ||
json.dumps( | ||
{ | ||
"address": address, | ||
"data": { | ||
"score": str( | ||
value["data"]["human_probability"] | ||
) | ||
}, | ||
"updated_at": d["updated_at"], | ||
}, | ||
cls=DjangoJSONEncoder, | ||
) | ||
+ "\n" | ||
) | ||
except Exception: | ||
log.error( | ||
f"Error when writing record '{d}'", exc_info=True | ||
) | ||
|
||
yield WriterWrappe(file) | ||
finally: | ||
pass | ||
|
||
return eth_stamp_writer_context_manager | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Export eth-model score to jsonl" | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--batch-size", | ||
type=int, | ||
default=1000, | ||
help="""Size of record batches. | ||
If present, this will read the records in batches. The result list is ordered by pk (id), to get | ||
to the next batch we query by id__gt=last_id. | ||
""", | ||
) | ||
parser.add_argument( | ||
"--s3-uri", type=str, help="The S3 URI target location for the files" | ||
) | ||
|
||
parser.add_argument("--filename", type=str, help="The output filename") | ||
|
||
parser.add_argument( | ||
"--s3-extra-args", | ||
type=str, | ||
help="""JSON object, that contains extra args for the files uploaded to S3. | ||
This will be passed in as the `ExtraArgs` parameter to boto3's upload_file method.""", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
batch_size = options["batch_size"] | ||
s3_uri = options["s3_uri"] | ||
filename = options["filename"] | ||
|
||
extra_args = ( | ||
json.loads(options["s3_extra_args"]) if options["s3_extra_args"] else None | ||
) | ||
|
||
self.stdout.write(f"EXPORT - s3_uri : '{s3_uri}'") | ||
self.stdout.write(f"EXPORT - batch_size : '{batch_size}'") | ||
self.stdout.write(f"EXPORT - filename : '{filename}'") | ||
|
||
parsed_uri = urlparse(s3_uri) | ||
s3_bucket_name = parsed_uri.netloc | ||
s3_folder = parsed_uri.path.strip("/") | ||
|
||
try: | ||
export_data_for_model( | ||
Cache.objects.all(), | ||
"key", | ||
batch_size, | ||
get_writer(filename), | ||
jsonfields_as_str=False, | ||
) | ||
|
||
self.stdout.write( | ||
self.style.SUCCESS(f"EXPORT - Data exported to '{filename}'") | ||
) | ||
|
||
upload_to_s3(filename, s3_folder, s3_bucket_name, extra_args) | ||
|
||
self.stdout.write( | ||
self.style.SUCCESS( | ||
f"EXPORT - Data uploaded to '{s3_bucket_name}/{s3_folder}/{filename}'" | ||
) | ||
) | ||
|
||
except Exception as e: | ||
self.stdout.write( | ||
self.style.ERROR(f"EXPORT - Error when exporting data '{e}'") | ||
) | ||
self.stdout.write(self.style.ERROR(traceback.format_exc())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from django.contrib import admin | ||
|
||
from scorer.scorer_admin import ScorerModelAdmin | ||
from data_model.models import Cache | ||
|
||
|
||
@admin.register(Cache) | ||
class CacheAdmin(ScorerModelAdmin): | ||
list_display = [ | ||
"key", | ||
"value", | ||
"updated_at", | ||
] | ||
|
||
list_filter = [] | ||
|
||
search_fields = [ | ||
"key", | ||
"value", | ||
] | ||
search_help_text = "Search by: " + ", ".join(search_fields) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from django.apps import AppConfig | ||
|
||
|
||
class DataModelConfig(AppConfig): | ||
default_auto_field = "django.db.models.BigAutoField" | ||
name = "data_model" |
Oops, something went wrong.