Skip to content

Commit

Permalink
CUMULUS-3756 NDCUM-1264 Added excludeFileRegex configuration to updat…
Browse files Browse the repository at this point in the history
…e-granules-cmr-metadata-… (#3791)

* Added excludeFileRegex configuration to update-granules-cmr-metadata-file-links (#3790)

Updated tests to exercise new file-exclusion feature

* linter fixes

* remove explicit null for un-found regexpattern

* switch to logging when no excludable files found

* changelog broken into multiple lines

* linter fixes in changelog

* name in changelog after lambda function name

* remove TODO. non-mocked is a truer representation of function

* small refactor

* typo in passthrough of fileregex

* nyc values with new tests

* version requirement update

* fixed merge weirdness

* fix jsonpath in the other places it's flagged

* remove unneeded explicit pin in aws-client

* check like instead of deepequal on credentials return

---------

Co-authored-by: Mike Dorfman <42116953+mikedorfman@users.noreply.github.com>
  • Loading branch information
etcart and mikedorfman authored Oct 11, 2024
1 parent 5342484 commit 818f7ff
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 63 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
timeout on `sfEventSqsToDbRecordsDeadLetterQueue` and `sfEventSqsToDbRecordsInputQueue` and may lead to system
instability.

- **CUMULUS-3756**
- Added excludeFileRegex configuration to UpdateGranulesCmrMetadataFileLinks
- This is to allow files matching specified regex to be excluded when updating the Related URLs list
- Defaults to the current behavior of excluding no files.

### Changed

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion packages/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
"is-valid-hostname": "1.0.2",
"js-yaml": "^3.13.1",
"json2csv": "^4.5.1",
"jsonpath-plus": "^1.1.0",
"jsonpath-plus": "^10.0.0",
"jsonwebtoken": "^9.0.0",
"knex": "2.4.1",
"lodash": "^4.17.21",
Expand Down
58 changes: 28 additions & 30 deletions packages/aws-client/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@cumulus/aws-client",
"version": "19.0.0",
"version": "18.3.5",
"description": "Utilities for working with AWS",
"keywords": [
"GIBS",
Expand Down Expand Up @@ -37,7 +37,6 @@
"coverage": "python ../../scripts/coverage_handler/coverage.py"
},
"ava": {
"timeout": "30s",
"files": [
"tests/**/*.js"
],
Expand All @@ -47,33 +46,32 @@
"author": "Cumulus Authors",
"license": "Apache-2.0",
"dependencies": {
"@aws-sdk/client-api-gateway": "^3.621.0",
"@aws-sdk/client-cloudformation": "^3.621.0",
"@aws-sdk/client-cloudwatch-events": "^3.621.0",
"@aws-sdk/client-dynamodb": "^3.621.0",
"@aws-sdk/client-dynamodb-streams": "^3.621.0",
"@aws-sdk/client-ec2": "^3.621.0",
"@aws-sdk/client-ecs": "^3.621.0",
"@aws-sdk/client-elasticsearch-service": "^3.621.0",
"@aws-sdk/client-kinesis": "^3.621.0",
"@aws-sdk/client-kms": "^3.621.0",
"@aws-sdk/client-lambda": "^3.621.0",
"@aws-sdk/client-s3": "^3.621.0",
"@aws-sdk/client-secrets-manager": "^3.621.0",
"@aws-sdk/client-sfn": "^3.621.0",
"@aws-sdk/client-sns": "^3.621.0",
"@aws-sdk/client-sqs": "^3.621.0",
"@aws-sdk/client-sts": "^3.621.0",
"@aws-sdk/lib-dynamodb": "^3.621.0",
"@aws-sdk/lib-storage": "^3.621.0",
"@aws-sdk/s3-request-presigner": "^3.621.0",
"@aws-sdk/signature-v4-crt": "^3.621.0",
"@aws-sdk/types": "^3.609.0",
"@cumulus/checksum": "19.0.0",
"@cumulus/errors": "19.0.0",
"@cumulus/logger": "19.0.0",
"@cumulus/types": "19.0.0",
"jsonpath-plus": "^1.1.0",
"@aws-sdk/client-api-gateway": "^3.499.0",
"@aws-sdk/client-cloudformation": "^3.447.0",
"@aws-sdk/client-cloudwatch-events": "^3.447.0",
"@aws-sdk/client-dynamodb": "^3.447.0",
"@aws-sdk/client-dynamodb-streams": "^3.447.0",
"@aws-sdk/client-ec2": "^3.447.0",
"@aws-sdk/client-ecs": "^3.447.0",
"@aws-sdk/client-elasticsearch-service": "^3.529.1",
"@aws-sdk/client-kinesis": "^3.447.0",
"@aws-sdk/client-kms": "^3.447.0",
"@aws-sdk/client-lambda": "^3.529.1",
"@aws-sdk/client-s3": "^3.447.0 <3.614.2",
"@aws-sdk/client-secrets-manager": "^3.447.0",
"@aws-sdk/client-sfn": "^3.447.0",
"@aws-sdk/client-sns": "^3.447.0",
"@aws-sdk/client-sqs": "^3.447.0",
"@aws-sdk/client-sts": "^3.447.0",
"@aws-sdk/lib-dynamodb": "^3.447.0",
"@aws-sdk/lib-storage": "^3.447.0 <3.614.2",
"@aws-sdk/s3-request-presigner": "^3.447.0",
"@aws-sdk/signature-v4-crt": "^3.447.0",
"@aws-sdk/types": "^3.447.0",
"@cumulus/checksum": "18.3.5",
"@cumulus/errors": "18.3.5",
"@cumulus/logger": "18.3.5",
"@cumulus/types": "18.3.5",
"lodash": "~4.17.21",
"mem": "^8.0.2",
"p-map": "^1.2.0",
Expand All @@ -84,7 +82,7 @@
"uuid": "^8.2.0"
},
"devDependencies": {
"@cumulus/test-data": "18.0.0",
"@cumulus/test-data": "18.3.5",
"@types/uuid": "^8.0.0"
}
}
36 changes: 18 additions & 18 deletions packages/aws-client/tests/test-services.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ test('apigateway() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(APIGatewayClient);
t.deepEqual(
t.like(
await apigateway.config.credentials(),
credentials
);
Expand All @@ -44,7 +44,7 @@ test('cf() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(CloudFormation);
t.deepEqual(
t.like(
await cf.config.credentials(),
credentials
);
Expand All @@ -67,7 +67,7 @@ test('cloudwatchevents() service defaults to localstack in test mode', async (t)
endpoint,
} = localStackAwsClientOptions(CloudWatchEvents);

t.deepEqual(
t.like(
await cloudwatchevents.config.credentials(),
credentials
);
Expand All @@ -89,7 +89,7 @@ test('dynamoDb() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(DynamoDB);
t.deepEqual(
t.like(
await dynamodb.config.credentials(),
credentials
);
Expand All @@ -110,7 +110,7 @@ test('dynamodbDocClient() service defaults to localstack in test mode', async (t
credentials,
endpoint,
} = localStackAwsClientOptions(DynamoDB);
t.deepEqual(
t.like(
await dynamodbDocClient.config.credentials(),
credentials
);
Expand All @@ -131,7 +131,7 @@ test('dynamodbstreams() service defaults to localstack in test mode', async (t)
credentials,
endpoint,
} = localStackAwsClientOptions(DynamoDB);
t.deepEqual(
t.like(
await dynamodbstreams.config.credentials(),
credentials
);
Expand All @@ -152,7 +152,7 @@ test('ecs() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(ECS);
t.deepEqual(
t.like(
await ecs.config.credentials(),
credentials
);
Expand All @@ -173,7 +173,7 @@ test('ec2() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(EC2);
t.deepEqual(
t.like(
await ec2.config.credentials(),
credentials
);
Expand All @@ -194,7 +194,7 @@ test('es() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(ElasticsearchService);
t.deepEqual(
t.like(
await es.config.credentials(),
credentials
);
Expand All @@ -215,7 +215,7 @@ test('kinesis() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(Kinesis);
t.deepEqual(
t.like(
await kinesis.config.credentials(),
credentials
);
Expand All @@ -236,7 +236,7 @@ test('kms() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(KMS);
t.deepEqual(
t.like(
await kms.config.credentials(),
credentials
);
Expand All @@ -258,7 +258,7 @@ test('lambda() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(Lambda);
t.deepEqual(
t.like(
await lambda.config.credentials(),
credentials
);
Expand All @@ -279,7 +279,7 @@ test('s3() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(S3);
t.deepEqual(
t.like(
await s3.config.credentials(),
credentials
);
Expand All @@ -300,7 +300,7 @@ test('secretsManager() service defaults to localstack in test mode', async (t) =
credentials,
endpoint,
} = localStackAwsClientOptions(SecretsManager);
t.deepEqual(
t.like(
await secretsManager.config.credentials(),
credentials
);
Expand All @@ -321,7 +321,7 @@ test('sfn() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(SFN);
t.deepEqual(
t.like(
await sfn.config.credentials(),
credentials
);
Expand All @@ -343,7 +343,7 @@ test('sns() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(SNS);
t.deepEqual(
t.like(
await sns.config.credentials(),
credentials
);
Expand All @@ -365,7 +365,7 @@ test('sqs() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(SQS);
t.deepEqual(
t.like(
await sqs.config.credentials(),
credentials
);
Expand All @@ -386,7 +386,7 @@ test('sts() service defaults to localstack in test mode', async (t) => {
credentials,
endpoint,
} = localStackAwsClientOptions(STS);
t.deepEqual(
t.like(
await sts.config.credentials(),
credentials
);
Expand Down
2 changes: 1 addition & 1 deletion packages/common/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"fs-extra": "^5.0.0",
"got": "^14.2.1",
"is-ip": "^3.1.0",
"jsonpath-plus": "^3.0.0",
"jsonpath-plus": "^10.0.0",
"lodash": "^4.17.21",
"node-forge": "^1.3.0",
"p-limit": "^2.0.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/message/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"@cumulus/errors": "19.0.0",
"@cumulus/logger": "19.0.0",
"@cumulus/types": "19.0.0",
"jsonpath-plus": "^3.0.0",
"jsonpath-plus": "^10.0.0",
"lodash": "^4.17.21",
"uuidv4": "^6.2.13"
},
Expand Down
2 changes: 1 addition & 1 deletion tasks/update-granules-cmr-metadata-file-links/.nycrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
],
"statements": 93.0,
"functions": 80.0,
"branches": 97.0,
"branches": 89.0,
"lines": 93.0
}
37 changes: 26 additions & 11 deletions tasks/update-granules-cmr-metadata-file-links/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const cumulusMessageAdapter = require('@cumulus/cumulus-message-adapter-js');
const get = require('lodash/get');
const keyBy = require('lodash/keyBy');
const cloneDeep = require('lodash/cloneDeep');
const Logger = require('@cumulus/logger');
const { getObjectSize } = require('@cumulus/aws-client/S3');
const { s3 } = require('@cumulus/aws-client/services');

Expand All @@ -20,36 +21,49 @@ const {
updateCMRMetadata,
} = require('@cumulus/cmrjs');

const logger = new Logger({ sender: '@cumulus/update-granules-cmr-metadata-file-links' });
/**
* Update each of the CMR files' OnlineAccessURL fields to represent the new
* file locations. This function assumes that there will only ever be a single CMR file per granule.
*
* @param {Array<Object>} cmrFiles - array of objects that include CMR xmls uris and
* granuleIds
* @param {Object} granulesObject - an object of the granules where the key is the granuleId
* @param {string} cmrGranuleUrlType . - type of granule CMR url
* @param {string} distEndpoint - the api distribution endpoint
* @param {Object} bucketTypes - map of bucket names to bucket types
* @param {Object} distributionBucketMap - mapping of bucket->distirubtion path values
* (e.g. { bucket: distribution path })
* @param {Array<Object>} cmrFiles - array of objects that include CMR xmls uris and
* granuleIds
* @param {Object} granulesObject - an object of the granules where the key is granuleId
* @param {string} cmrGranuleUrlType . - type of granule CMR url
* @param {string} distEndpoint - the api distribution endpoint
* @param {Object} bucketTypes - map of bucket names to bucket types
* @param {Object} distributionBucketMap - mapping of bucket->distribution path values
* (e.g. { bucket: distribution path })
* @param {Object} excludeFileRegexPattern - pattern by which to exclude files from processing
* @returns {Promise<Object[]>} Array of updated CMR files with etags of newly updated files.
*
*/

async function updateEachCmrFileAccessURLs(
cmrFiles,
granulesObject,
cmrGranuleUrlType,
distEndpoint,
bucketTypes,
distributionBucketMap
distributionBucketMap,
excludeFileRegexPattern
) {
return await Promise.all(cmrFiles.map(async (cmrFile) => {
const granuleId = cmrFile.granuleId;
const granule = granulesObject[granuleId];
let files = granule.files;
if (excludeFileRegexPattern) {
const excludeFileRegex = new RegExp(excludeFileRegexPattern);
files = granule.files.filter((file) => !file.key.match(excludeFileRegex));

if (files.length === granule.files.length) {
logger.warn(`No files matched the excludeFileRegex ${excludeFileRegexPattern}. Found files: ${files.map((file) => file.key).join(', ')}`);
}
}
return await updateCMRMetadata({
granuleId,
cmrFile: granule.files.find(isCMRFile),
files: granule.files,
files: files,
distEndpoint,
published: false,
bucketTypes,
Expand Down Expand Up @@ -102,7 +116,8 @@ async function updateGranulesCmrMetadataFileLinks(event) {
cmrGranuleUrlType,
config.distribution_endpoint,
bucketTypes,
distributionBucketMap
distributionBucketMap,
config.excludeFileRegex
);

const updatedGranulesByGranuleId = await updateCmrFileInfo(cmrFiles, granulesByGranuleId);
Expand Down
1 change: 1 addition & 0 deletions tasks/update-granules-cmr-metadata-file-links/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"@cumulus/common": "19.0.0",
"@cumulus/cumulus-message-adapter-js": "2.2.0",
"@cumulus/distribution-utils": "19.0.0",
"@cumulus/logger": "19.0.0",
"lodash": "^4.17.15"
},
"devDependencies": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
"description": "The type of URL to add to the Online Access URLs in the CMR file. 'distribution' to point to the distribution API, 's3' to put in the S3 link, and 'none' to not add Online Access URLs for the granules.",
"enum": ["distribution", "s3", "both", "none"],
"default": "both"
},
"excludeFileRegex": {
"type": "string",
"description": "A regex string to match files that should be excluded from the CMR metadata file"
}
}
}
Loading

0 comments on commit 818f7ff

Please sign in to comment.