Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 41 #87

Merged
merged 13 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<img alt="NSIDC logo" src="https://nsidc.org/themes/custom/nsidc/logo.svg" width="150" />
</p>

# nsidc-metgen
# MetGenC

`nsidc-metgen` enables data producers as well as Operations staff managing the
`MetGenC` enables data producers as well as Operations staff managing the
data ingest workflow to create metadata files conforming to
NASA's Common Metadata Repository UMM-G specification."

Expand Down Expand Up @@ -159,6 +159,14 @@ for the details.
$ source scripts/env.sh default
$ metgenc process --config example/modscg.ini

* Validate JSON output

$ metgenc validate -c example/modscg.ini -t cnm

The package `check-jsonschema` is also installed by MetGenC and can be used to validate a single file:

$ check-jsonschema --schemafile <path to schema file> <path to CNM file>

* Exit the Poetry shell:

$ exit
Expand Down
837 changes: 508 additions & 329 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ rich = "^13.7.1"
boto3 = "^1.35.22"
xarray = "^2024.09.0"
pyproj = "^3.7.0"
jsonschema = "^4.23.0"
check-jsonschema = "^0.29.4"

returns = "^0.23.0"
toolz = "^1.0.0"
Expand Down
10 changes: 10 additions & 0 deletions src/nsidc/metgen/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ def info(config_filename):
metgen.init_logging(configuration)
configuration.show()

@cli.command()
@click.option('-c', '--config', 'config_filename', help='Path to configuration file', required=True)
@click.option('-t', '--type', 'content_type', help='JSON content type', default='cnm', show_default=True)
def validate(config_filename, content_type):
"""Validates the contents of local JSON files."""
click.echo(metgen.banner())
configuration = config.configuration(config.config_parser_factory(config_filename), {})
metgen.init_logging(configuration)
metgen.validate(configuration, content_type)
Copy link
Contributor

@eigenbeam eigenbeam Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May want to insert a validation of the config file? I'm not sure it's necessary, though, because there could be lots of stuff in the config file that we don't care about when we're just validating the json? https://github.com/nsidc/granule-metgen/blob/main/src/nsidc/metgen/cli.py#L54

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My take on the story is that we're focused on validating JSON output in the issue-41 feature, not other things that do in fact need to be validated. I'm inclined to stick with the current setup. What I feel would be better, config-file-wise, is to add the equivalent ofconfig.validate(configuration) to the CLI init processing so that the new .ini file is immediately checked for errors.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm good with that


@cli.command()
@click.option('-c', '--config', 'config_filename', help='Path to configuration file', required=True)
@click.option('-e', '--env', help='environment', default=constants.DEFAULT_CUMULUS_ENVIRONMENT, show_default=True)
Expand Down
8 changes: 8 additions & 0 deletions src/nsidc/metgen/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from datetime import datetime, timezone
import logging
import os.path
from pathlib import Path
import uuid

from nsidc.metgen import aws
Expand Down Expand Up @@ -39,6 +40,13 @@ def show(self):
for k,v in self.__dict__.items():
LOGGER.info(f' + {k}: {v}')

def ummg_path(self):
return Path(self.local_output_dir, self.ummg_dir)

def cnm_path(self):
return Path(self.local_output_dir, 'cnm')


def config_parser_factory(configuration_file):
"""
Returns a ConfigParser by reading the specified file.
Expand Down
4 changes: 4 additions & 0 deletions src/nsidc/metgen/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
DEFAULT_CHECKSUM_TYPE = 'SHA256'
DEFAULT_NUMBER = 1000000

# JSON schema locations and versions
CNM_JSON_SCHEMA = 'src/nsidc/metgen/json-schema/cumulus_sns_schema.json'
CNM_JSON_SCHEMA_VERSION = '1.6.1'

# Configuration sections
SOURCE_SECTION_NAME = 'Source'
COLLECTION_SECTION_NAME = 'Collection'
Expand Down
276 changes: 276 additions & 0 deletions src/nsidc/metgen/json-schema/cumulus_sns_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"file": {
"type": "object",
"properties": {
"type": {
"description": "The type of file. science files (netcdf, HDF, binary) should use the 'data' type. More can be added if need and consensus demand.",
"type": "string",
"enum": [
"data",
"browse",
"metadata",
"ancillary",
"linkage"
]
},
"subtype": {
"description": "An optional, specific implmentation of the file::type. e.g. NetCDF for a file of type 'data'",
"type": "string"
},
"uri": {
"description": "the URI of the file (s3://...)",
"type": "string"
},
"name": {
"description": "The human readable filename that this file represents. ",
"type": "string"
},
"checksumType": {
"description ": "Type of the checksum (e.g. md5). Optional. If no checksumType is defined for a file, it is assumed to be md5",
"type": "string",
"enum": [
"SHA512",
"SHA256",
"SHA2",
"SHA1",
"md5"
]
},
"checksum": {
"description": "Checksum of the file.",
"type": "string"
},
"size": {
"description": "Size, in bytes, of the file.",
"type": "number"
}
},
"required": [
"type",
"uri",
"size",
"name"
]
},
"filegroup": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "string id of the filegroup by which all files are associated."
},
"files": {
"description": "array of files that make up this product",
"type": "array",
"items": {
"$ref": "#/definitions/file"
}
}
},
"required": [
"id",
"files"
]
},
"collection": {
"description": "The collection short name and version.",
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "collection short name."
},
"version": {
"type": "string",
"description": "collection version."
}
},
"required": [
"name",
"version"
]
}
},
"title": "Cloud Notification Message (cnm) 1.2 ",
"description": "A message format to trigger or respond to processing. Version 1.2",
"type": "object",
"properties": {
"version": {
"description": "The CNM Version used. e.g. '1.3'",
"type": "string",
"enum": [
"1.0",
"1.1",
"1.2",
"1.3",
"1.4",
"1.4.1",
"1.5",
"1.5.1",
"1.6.0",
"1.6.1-alpha.0",
"1.6.1"
]
},
"receivedTime": {
"description": "Time message was received by the ingest system",
"type": "string",
"format": "date-time"
},
"processCompleteTime": {
"description": "The time processing completed by the receiving entity.",
"type": "string",
"format": "date-time"
},
"submissionTime": {
"description": "The time the message was created (and presumably sent) to the publication mechanism.",
"type": "string",
"format": "date-time"
},
"identifier": {
"description": "Unique identifier for the message as a whole. It is the senders responsibility to ensure uniqueness. This identifier can be used in response messages to provide tracability.",
"type": "string"
},
"collection": {
"description": "The collection to which these granules will belong.",
"anyOf": [
{
"type": "string"
},
{
"$ref": "#/definitions/collection"
}
]
},
"provider": {
"description": "the name of the provider (e.g. SIP, SDS, etc. ) producing these files.",
"type": "string"
},
"trace": {
"description": "Information on the message or who is sending it.",
"type": "string"
},
"response": {
"description": "The response message type sent. Can be a success message or error message. Akin to both the PAN and PDRD",
"type": "object",
"properties": {
"status": {
"description": "Successful or error.",
"type": "string",
"enum": [
"SUCCESS",
"FAILURE"
]
},
"ingestionMetadata": {
"description": "Object defining ingestion metadata, like CMR Concept IDs, URLS, etc",
"type": "object",
"properties": {
"catalogId": {
"description": "Identifier for catalog",
"type": "string"
},
"catalogUrl": {
"description": "URL of catalog entry",
"type": "string"
}
}
},
"errorCode": {
"description": "Error message. Success messages can be ignored.",
"type": "string",
"enum": [
"VALIDATION_ERROR",
"PROCESSING_ERROR",
"TRANSFER_ERROR"
]
},
"errorMessage": {
"description": "The message error for the failure that occured.",
"type": "string"
}
},
"required": [
"status"
]
},
"product": {
"type": "object",
"properties": {
"name": {
"description": "Identifier/name of the product",
"type": "string"
},
"dataVersion": {
"description": "Version of this product",
"type": "string"
},
"dataProcessingType": {
"description": "The type of data processing stream that generated the product",
"type": "string",
"enum": [
"forward",
"reprocessing"
]
},
"files": {
"description": "array of files that make up this product",
"type": "array",
"items": {
"$ref": "#/definitions/file"
}
},
"filegroups": {
"description": "array of filegroups that make up this product",
"type": "array",
"items": {
"$ref": "#/definitions/filegroup"
}
}
},
"oneOf": [
{
"required": [
"name",
"files"
]
},
{
"required": [
"name",
"filegroups"
]
}
]
}
},
"oneOf": [
{
"required": [
"version",
"submissionTime",
"collection",
"identifier",
"product"
],
"not": {
"required": [
"response"
]
}
},
{
"required": [
"version",
"receivedTime",
"processCompleteTime",
"submissionTime",
"identifier",
"collection",
"response"
]
}
]
}
Loading