From 1f55273265f42bb0894227ec555bc0b2fe60ec74 Mon Sep 17 00:00:00 2001 From: larisa17 Date: Thu, 17 Oct 2024 10:49:50 +0300 Subject: [PATCH 1/5] feat(infra): WIP data exports to grants account --- .../commands/scorer_dump_data_model_score.py | 17 ++- infra/aws/index.ts | 111 ++++++++++++++++-- infra/lib/scorer/scheduledTasks.ts | 4 +- 3 files changed, 118 insertions(+), 14 deletions(-) diff --git a/api/data_model/management/commands/scorer_dump_data_model_score.py b/api/data_model/management/commands/scorer_dump_data_model_score.py index da693f9e8..73c013ad0 100644 --- a/api/data_model/management/commands/scorer_dump_data_model_score.py +++ b/api/data_model/management/commands/scorer_dump_data_model_score.py @@ -13,6 +13,7 @@ from data_model.models import Cache from scorer.export_utils import ( + AWSOverrideCredentials, export_data_for_model, get_pa_schema, upload_to_s3, @@ -108,6 +109,8 @@ def add_arguments(self, parser): ) parser.add_argument("--filename", type=str, help="The output filename") + parser.add_argument("--s3-access-key", type=str, default="", help="The S3 access key for dedicated S3 download (like digital ocean)") + parser.add_argument("--s3-secret-access-key", type=str, default="", help="The S3 secret access key for dedicated S3 download (like digital ocean)") parser.add_argument( "--s3-extra-args", type=str, @@ -132,6 +135,8 @@ def handle(self, *args, **options): batch_size = options["batch_size"] s3_uri = options["s3_uri"] filename = options["filename"] + s3_access_key = options["s3_access_key"] + s3_secret_access_key = options["s3_secret_access_key"] format = options["format"] data_model_names = ( [n.strip() for n in options["data_model"].split(",")] @@ -170,7 +175,17 @@ def handle(self, *args, **options): self.style.SUCCESS(f"EXPORT - Data exported to '{filename}'") ) - upload_to_s3(filename, s3_folder, s3_bucket_name, extra_args) + if s3_access_key and s3_secret_access_key: + aws_override_credentials = AWSOverrideCredentials( + aws_access_key_id=s3_access_key, + aws_secret_access_key=s3_secret_access_key, + aws_endpoint_url="", + ) + upload_to_s3( + filename, s3_folder, s3_bucket_name, extra_args, aws_override_credentials + ) + else: + upload_to_s3(filename, s3_folder, s3_bucket_name, extra_args) if cloudfront_distribution_id: client = boto3.client("cloudfront") diff --git a/infra/aws/index.ts b/infra/aws/index.ts index 0e89337e8..b5fcff455 100644 --- a/infra/aws/index.ts +++ b/infra/aws/index.ts @@ -1428,8 +1428,8 @@ const exportVals = createScoreExportBucketAndDomain( ); // The following scorer dumps the Allo scorer scores to a public S3 bucket // for the Allo team to easily pull the data - -export const frequentAlloScorerDataDumpTaskDefinition = pulumi +// This will be removed after the confirmation that the new exports are working properly. +const frequentAlloScorerDataDumpTaskDefinition = pulumi .all([exportVals]) .apply(([_exportedVals]) => { return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { @@ -1456,8 +1456,6 @@ export const frequentAlloScorerDataDumpTaskDefinition = pulumi ]) + "'", `--s3-uri=s3://${publicDataDomain}/passport_scores/`, - // "--summary-extra-args", - // JSON.stringify({ ACL: "public-read" }), ].join(" "), scheduleExpression: "cron(*/30 * ? * * *)", // Run the task every 30 min alertTopic: pagerdutyTopic, @@ -1471,7 +1469,64 @@ export const frequentAlloScorerDataDumpTaskDefinition = pulumi }); }); -export const frequentScorerDataDumpTaskDefinitionForScorer_335 = pulumi +// Only for production +if (stack === "production") { + const frequentAlloScorerDataDumpTaskDefinitionDigitalOcean = pulumi + .all([exportVals, apiSecrets]) + .apply(([_exportedVals, _apiSecrets]) => { + console.log("apiSecrets", _apiSecrets); + const digitalOceanAccessKey = _apiSecrets.find(secret => secret.name === "GRANTS_DIGITAL_OCEAN_ACCESS_KEY")?.valueFrom; + const digitalOceanSecretAccessKey = _apiSecrets.find(secret => secret.name === "GRANTS_DIGITAL_OCEAN_SECRET_ACCESS_KEY")?.valueFrom; + const digitalOceanS3Endpoint = op.read.parse( + `op://DevOps/passport-scorer-${stack}-env/api/GRANTS_DIGITAL_OCEAN_S3_ENDPOINT` + ); + return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { + createScheduledTask({ + name: "frequent-allo-scorer-data-dump-digital-cean", + config: { + ...baseScorerServiceConfig, + securityGroup: secgrp, + command: [ + "python", + "manage.py", + "scorer_dump_data", + "--batch-size=1000", + "--database=read_replica_analytics", + "--config", + "'" + + JSON.stringify([ + { + name: "registry.Score", + filter: { passport__community_id: 335 }, + select_related: ["passport"], + }, + ]) + + "'", + `--s3-uri=s3://${digitalOceanS3Endpoint}`, + ].join(" "), + scheduleExpression: "cron(*/30 * ? * * *)", // Run the task every 30 min + alertTopic: pagerdutyTopic, + }, + environment:apiEnvironment, + + secrets: _apiSecrets.map(secret => { + if (secret.name === "S3_DATA_AWS_SECRET_ACCESS_KEY") { + return { ...secret, value: digitalOceanAccessKey}; // Replace for data dump with digital ocean credentials + } + if (secret.name === "S3_DATA_AWS_SECRET_KEY_ID") { + return { ...secret, value: digitalOceanSecretAccessKey }; // Replace for data dump with digital ocean credentials + } + return secret; + }), + alarmPeriodSeconds: 3600, // 1h in seconds + enableInvocationAlerts: true, + scorerSecretManagerArn: scorerSecret.arn, + }); + }); + }); +} + +const frequentScorerDataDumpTaskDefinitionForScorer_335 = pulumi .all([exportVals]) .apply(([_exportedVals]) => { return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { @@ -1498,8 +1553,6 @@ export const frequentScorerDataDumpTaskDefinitionForScorer_335 = pulumi ]) + "'", `--s3-uri=s3://${publicDataDomain}/passport_scores/335/`, - // "--summary-extra-args", - // JSON.stringify({ ACL: "public-read" }), ].join(" "), scheduleExpression: "cron(*/30 * ? * * *)", // Run the task every 30 min alertTopic: pagerdutyTopic, @@ -1513,7 +1566,7 @@ export const frequentScorerDataDumpTaskDefinitionForScorer_335 = pulumi }); }); -export const frequentScorerDataDumpTaskDefinitionForScorer_6608 = pulumi +const frequentScorerDataDumpTaskDefinitionForScorer_6608 = pulumi .all([exportVals]) .apply(([_exportedVals]) => { return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { @@ -1540,8 +1593,6 @@ export const frequentScorerDataDumpTaskDefinitionForScorer_6608 = pulumi ]) + "'", `--s3-uri=s3://${publicDataDomain}/passport_scores/6608/`, - // "--summary-extra-args", - // JSON.stringify({ ACL: "public-read" }), ].join(" "), scheduleExpression: "cron(*/30 * ? * * *)", // Run the task every 30 min alertTopic: pagerdutyTopic, @@ -1558,7 +1609,8 @@ export const frequentScorerDataDumpTaskDefinitionForScorer_6608 = pulumi /* * Dump data for the eth-model V2 */ -export const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorer = pulumi +// this for sure +const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorer = pulumi .all([exportVals]) .apply(([_exportedVals]) => { return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { @@ -1588,6 +1640,43 @@ export const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorer = pulumi }); }); +if (stack === "production") { +const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorerDigitalOcean = pulumi + .all([exportVals, apiSecrets]) + .apply(([_exportedVals, _apiSecrets]) => { + // const digitalOceanAccessKey = _apiSecrets.find(secret => secret.name === "GRANTS_DIGITAL_OCEAN_ACCESS_KEY")?.valueFrom; + // const digitalOceanSecretAccessKey = _apiSecrets.find(secret => secret.name === "GRANTS_DIGITAL_OCEAN_SECRET_ACCESS_KEY")?.valueFrom; + const digitalOceanS3Endpoint = op.read.parse( + `op://DevOps/passport-scorer-${stack}-env/api/GRANTS_DIGITAL_OCEAN_S3_ENDPOINT` + ); + return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { + createScheduledTask({ + name: "frequent-eth-model-v2-score-dump-digital-ocean", + config: { + ...baseScorerServiceConfig, + securityGroup: secgrp, + command: [ + "python", + "manage.py", + "scorer_dump_data_model_score", + `--s3-uri=s3://${digitalOceanS3Endpoint}`, + `--s3_access_key=$GRANTS_DIGITAL_OCEAN_ACCESS_KEY`, + `--s3_secret_key=$GRANTS_DIGITAL_OCEAN_SECRET_ACCESS_KEY`, + "--filename=model_scores.parquet", + "--format=parquet", + ].join(" "), + scheduleExpression: "cron(*/30 * ? * * *)", // Run the task every 30 min + alertTopic: pagerdutyTopic, + }, + environment: apiEnvironment, + secrets: _apiSecrets, + alarmPeriodSeconds: 3600, // 1h in seconds + enableInvocationAlerts: true, + scorerSecretManagerArn: scorerSecret.arn, + }); + }); + }); +} export const coinbaseRevocationCheck = createScheduledTask({ name: "coinbase-revocation-check", config: { diff --git a/infra/lib/scorer/scheduledTasks.ts b/infra/lib/scorer/scheduledTasks.ts index 573588dcc..45f4f18aa 100644 --- a/infra/lib/scorer/scheduledTasks.ts +++ b/infra/lib/scorer/scheduledTasks.ts @@ -34,7 +34,7 @@ export function createTask({ name: string; config: ScheduledTaskConfig; environment: secretsManager.EnvironmentVar[]; - secrets: pulumi.Output; + secrets: pulumi.Output | secretsManager.SecretRef[]; scorerSecretManagerArn: Input; }) { const { @@ -225,7 +225,7 @@ export function createScheduledTask({ name: string; config: ScheduledTaskConfig; environment: secretsManager.EnvironmentVar[]; - secrets: pulumi.Output; + secrets: pulumi.Output | secretsManager.SecretRef[]; alarmPeriodSeconds?: number; enableInvocationAlerts?: boolean; scorerSecretManagerArn: Input; From 83e816117c13e59611a3832a6a872ee26c99fcaa Mon Sep 17 00:00:00 2001 From: larisa17 Date: Fri, 18 Oct 2024 16:13:17 +0300 Subject: [PATCH 2/5] WIP debug message --- .../management/commands/scorer_dump_data_model_score.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/data_model/management/commands/scorer_dump_data_model_score.py b/api/data_model/management/commands/scorer_dump_data_model_score.py index 73c013ad0..5b939430a 100644 --- a/api/data_model/management/commands/scorer_dump_data_model_score.py +++ b/api/data_model/management/commands/scorer_dump_data_model_score.py @@ -153,6 +153,9 @@ def handle(self, *args, **options): self.stdout.write(f"EXPORT - batch_size : '{batch_size}'") self.stdout.write(f"EXPORT - filename : '{filename}'") + + self.stdout.write(f"EXPORT - DEBUG LARISA : acc_key : '{s3_access_key}', s3_secret_access_key: '{s3_secret_access_key}'") + parsed_uri = urlparse(s3_uri) s3_bucket_name = parsed_uri.netloc s3_folder = parsed_uri.path.strip("/") From 534ba8ecbade05c68c7b098a3b75d4b8d5711942 Mon Sep 17 00:00:00 2001 From: larisa17 Date: Mon, 21 Oct 2024 12:19:32 +0300 Subject: [PATCH 3/5] add cgrants data exports --- infra/aws/index.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/infra/aws/index.ts b/infra/aws/index.ts index b5fcff455..09d6a36ae 100644 --- a/infra/aws/index.ts +++ b/infra/aws/index.ts @@ -1474,7 +1474,6 @@ if (stack === "production") { const frequentAlloScorerDataDumpTaskDefinitionDigitalOcean = pulumi .all([exportVals, apiSecrets]) .apply(([_exportedVals, _apiSecrets]) => { - console.log("apiSecrets", _apiSecrets); const digitalOceanAccessKey = _apiSecrets.find(secret => secret.name === "GRANTS_DIGITAL_OCEAN_ACCESS_KEY")?.valueFrom; const digitalOceanSecretAccessKey = _apiSecrets.find(secret => secret.name === "GRANTS_DIGITAL_OCEAN_SECRET_ACCESS_KEY")?.valueFrom; const digitalOceanS3Endpoint = op.read.parse( @@ -1482,7 +1481,7 @@ if (stack === "production") { ); return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { createScheduledTask({ - name: "frequent-allo-scorer-data-dump-digital-cean", + name: "frequent-allo-scorer-data-dump-cgrants", config: { ...baseScorerServiceConfig, securityGroup: secgrp, @@ -1651,7 +1650,7 @@ const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorerDigitalOcean = pulum ); return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { createScheduledTask({ - name: "frequent-eth-model-v2-score-dump-digital-ocean", + name: "frequent-eth-model-v2-dump-cgrants", config: { ...baseScorerServiceConfig, securityGroup: secgrp, @@ -1660,8 +1659,8 @@ const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorerDigitalOcean = pulum "manage.py", "scorer_dump_data_model_score", `--s3-uri=s3://${digitalOceanS3Endpoint}`, - `--s3_access_key=$GRANTS_DIGITAL_OCEAN_ACCESS_KEY`, - `--s3_secret_key=$GRANTS_DIGITAL_OCEAN_SECRET_ACCESS_KEY`, + `--s3-access-key=$GRANTS_DIGITAL_OCEAN_ACCESS_KEY`, + `--s3-secret-access-key=$GRANTS_DIGITAL_OCEAN_SECRET_ACCESS_KEY`, "--filename=model_scores.parquet", "--format=parquet", ].join(" "), From 1a8851915596d47d8e374681a7416840c70213c1 Mon Sep 17 00:00:00 2001 From: larisa17 Date: Mon, 21 Oct 2024 13:35:06 +0300 Subject: [PATCH 4/5] update secrets --- infra/aws/index.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/infra/aws/index.ts b/infra/aws/index.ts index 09d6a36ae..cc060de3f 100644 --- a/infra/aws/index.ts +++ b/infra/aws/index.ts @@ -1510,13 +1510,13 @@ if (stack === "production") { secrets: _apiSecrets.map(secret => { if (secret.name === "S3_DATA_AWS_SECRET_ACCESS_KEY") { - return { ...secret, value: digitalOceanAccessKey}; // Replace for data dump with digital ocean credentials + return { ...secret, valueFrom: digitalOceanAccessKey}; // Replace for data dump with digital ocean credentials } if (secret.name === "S3_DATA_AWS_SECRET_KEY_ID") { - return { ...secret, value: digitalOceanSecretAccessKey }; // Replace for data dump with digital ocean credentials + return { ...secret, valueFrom: digitalOceanSecretAccessKey }; // Replace for data dump with digital ocean credentials } return secret; - }), + }) as secretsManager.SecretRef[], alarmPeriodSeconds: 3600, // 1h in seconds enableInvocationAlerts: true, scorerSecretManagerArn: scorerSecret.arn, From b2ef7387b820f2f6bbaaf818da2710e860e92711 Mon Sep 17 00:00:00 2001 From: larisa17 Date: Mon, 21 Oct 2024 13:44:29 +0300 Subject: [PATCH 5/5] rename data exports --- infra/aws/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/aws/index.ts b/infra/aws/index.ts index cc060de3f..2e741ea14 100644 --- a/infra/aws/index.ts +++ b/infra/aws/index.ts @@ -1481,7 +1481,7 @@ if (stack === "production") { ); return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { createScheduledTask({ - name: "frequent-allo-scorer-data-dump-cgrants", + name: "frequent-allo-scorer-data-dump-grants", config: { ...baseScorerServiceConfig, securityGroup: secgrp, @@ -1650,7 +1650,7 @@ const frequentEthModelV2ScoreDataDumpTaskDefinitionForScorerDigitalOcean = pulum ); return pulumi.all([_exportedVals.cloudFront.id]).apply(([cloudFrontId]) => { createScheduledTask({ - name: "frequent-eth-model-v2-dump-cgrants", + name: "frequent-eth-model-v2-dump-grants", config: { ...baseScorerServiceConfig, securityGroup: secgrp,