From cb62a17e82167de546d58404ba8b5f99747bc7b6 Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Wed, 26 Apr 2023 05:32:01 +0000 Subject: [PATCH 1/9] Upload a test Burn Cube product cfg --- .../ga_ls8c_bc_4cyear_2020.odc-product.yaml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 configs/bc_products/ga_ls8c_bc_4cyear_2020.odc-product.yaml diff --git a/configs/bc_products/ga_ls8c_bc_4cyear_2020.odc-product.yaml b/configs/bc_products/ga_ls8c_bc_4cyear_2020.odc-product.yaml new file mode 100644 index 0000000..2075139 --- /dev/null +++ b/configs/bc_products/ga_ls8c_bc_4cyear_2020.odc-product.yaml @@ -0,0 +1,50 @@ +name: ga_ls8c_bc_4cyear_2020 +description: Geoscience Australia Landsat Nadir BRDF Adjusted Reflectance Terrain, Landsat 8 Burn Cube 4 Calendar Years Collection 3 +license: CC-BY-4.0 +metadata_type: eo3 + +metadata: + properties: + odc:file_format: GeoTIFF + odc:product_family: burncube + product: + name: ga_ls8c_bc_4cyear_2020 + +measurements: + - name: wofssevere + dtype: float64 + nodata: -999 + units: '1' + - name: wofsseverity + dtype: float64 + nodata: -999 + units: '1' + - name: wofsmoderate + dtype: float64 + nodata: -999 + units: '1' + - name: severe + dtype: int16 + nodata: -999 + units: '1' + - name: severity + dtype: float64 + nodata: -999 + units: '1' + - name: moderate + dtype: int16 + nodata: -999 + units: '1' + - name: count + dtype: int16 + nodata: -999 + units: '1' + +load: + crs: 'epsg:3577' + resolution: + y: -30 + x: 30 + align: + y: 0 + x: 0 From a981c399b90886634f6ba8d26f4447ae1cdd0a28 Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Thu, 27 Apr 2023 03:40:36 +0000 Subject: [PATCH 2/9] Add pystac lib to burn cube --- constraints.txt | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/constraints.txt b/constraints.txt index 062c7c6..238813f 100644 --- a/constraints.txt +++ b/constraints.txt @@ -95,6 +95,7 @@ Pygments==2.14.0 pyparsing==3.0.9 pyproj==3.2.1 pyrsistent==0.19.3 +pystac==1.7.3 python-dateutil==2.8.2 pyTMD==1.1.3 pytz==2023.2 diff --git a/requirements.txt b/requirements.txt index 392768c..0384acf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ geopandas numpy==1.23.5 pyarrow pyproj==3.2.1 +pystac==1.7.3 s3fs==0.4.2 scipy==1.9.1 Shapely==1.8.5.post1 From 6b92afdb42081808ba7068bd99a514b510dca2e4 Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Mon, 1 May 2023 05:21:56 +0000 Subject: [PATCH 3/9] Use add_metadata method to CLI --- dea_burn_cube/__main__.py | 169 ++++++++++++++++++++++++++++++++++++++ dea_burn_cube/task.py | 37 ++++++++- requirements.txt | 1 + 3 files changed, 206 insertions(+), 1 deletion(-) diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index 0091d3c..6f93031 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -4,11 +4,15 @@ """ import logging +import math import os +import re import shutil import sys import zipfile +from datetime import datetime, timezone from multiprocessing import cpu_count +from typing import Any, Dict from urllib.parse import urlparse import boto3 @@ -18,11 +22,15 @@ import numpy as np import pandas as pd import pyproj +import pystac import requests import s3fs import xarray as xr from datacube.utils import geometry from datacube.utils.cog import write_cog +from datacube.utils.dates import normalise_dt +from odc.dscache.tools.tiling import parse_gridspec_with_name +from pystac.extensions.projection import ProjectionExtension from shapely.geometry import Point from shapely.ops import unary_union @@ -36,6 +44,14 @@ os.environ["SQLALCHEMY_SILENCE_UBER_WARNING"] = "1" +def format_datetime(dt: datetime, with_tz=True, timespec="microseconds") -> str: + dt = normalise_dt(dt) + dt = dt.isoformat(timespec=timespec) + if with_tz: + dt = dt + "Z" + return dt + + @task.log_execution_time def result_file_saving_and_uploading( burn_cube_result_apply_wofs: xr.Dataset, @@ -450,6 +466,159 @@ def update_hotspot_data( ) +@main.command(no_args_is_help=True) +@click.option( + "--task-id", + "-t", + type=str, + default=None, + help="REQUIRED. Burn Cube task id, e.g. Dec-21.", +) +@click.option( + "--region-id", + "-r", + type=str, + default=None, + help="REQUIRED. Region id AU-30 Grid.", +) +@click.option( + "--process-cfg-url", + "-p", + type=str, + default=None, + help="REQUIRED. The Path URL to Burn Cube process cfg file as YAML format.", +) +@click.option( + "--overwrite/--no-overwrite", + default=False, + help="Rerun scenes that have already been processed.", +) +def burn_cube_add_metadata( + task_id, + region_id, + process_cfg_url, + overwrite, +): + logging_setup() + + process_cfg = task.load_yaml_remote(process_cfg_url) + + task_table = process_cfg["task_table"] + + # output = process_cfg["output_folder"] + + bc_running_task = task.generate_task(task_id, task_table) + + mappingperiod = ( + bc_running_task["Mapping Period Start"], + bc_running_task["Mapping Period End"], + ) + + # TODO: only check the NetCDF is not enough + + # local_file_path, target_file_path = task.generate_output_filenames( + # output, task_id, region_id, platform + # ) + + # o = urlparse(output) + + # bucket_name = o.netloc + # object_key = target_file_path[1:] + + processing_dt = datetime.utcnow() + + product_name = "ga_ls8c_bc_4cyear_2020" + product_version = "3.0.0" + + properties: Dict[str, Any] = {} + + data_source = process_cfg["input_products"]["platform"] + + properties["title"] = f"BurnMapping-{data_source}-{task_id}-{region_id}" + properties["dtr:start_datetime"] = format_datetime(mappingperiod[0]) + properties["dtr:end_datetime"] = format_datetime(mappingperiod[1]) + properties["odc:processing_datetime"] = format_datetime( + processing_dt, timespec="seconds" + ) + properties["odc:region_code"] = region_id + properties["odc:product"] = "ga_ls8c_bc_4cyear_2020" + properties["instruments"] = ["oli", "tirs"] # get it from ARD datasets + properties["gsd"] = 15 # get it from ARD datasets + properties["platform"] = "landsat-8" # get it from ARD datasets + properties["odc:file_format"] = "GeoTIFF" # get it from ARD datasets + properties["odc:product_family"] = "burncube" # get it from ARD datasets + properties["odc:producer"] = "ga.gov.au" # get it from ARD datasets + properties["odc:dataset_version"] = product_version # get it from ARD datasets + properties["dea:dataset_maturity"] = "final" + properties["odc:collection_number"] = 3 + + _, gridspec = parse_gridspec_with_name("au-30") + + # gridspec : au-30 + pattern = r"x(\d+)y(\d+)" + + match = re.match(pattern, region_id) + + if match: + x = int(match.group(1)) + y = int(match.group(2)) + print("x value:", x) + print("y value:", y) + else: + print("No match found.") + sys.exit(0) # cannot extract geobox, stop here + + geobox = gridspec.tile_geobox((x, y)) + + geobox_wgs84 = geobox.extent.to_crs( + "epsg:4326", resolution=math.inf, wrapdateline=True + ) + + bbox = geobox_wgs84.boundingbox + + uuid = task.odc_uuid( + product_name, + product_version, + sources=[], + ) + + item = pystac.Item( + id=str(uuid), + geometry=geobox_wgs84.json, + bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top], + datetime=pd.Timestamp(mappingperiod[0]).replace(tzinfo=timezone.utc), + properties=properties, + ) + + ProjectionExtension.add_to(item) + proj_ext = ProjectionExtension.ext(item) + proj_ext.apply( + geobox.crs.epsg, + transform=geobox.transform, + shape=geobox.shape, + ) + + # Add all the assets + # for band, path in self.paths(ext=ext).items(): + # asset = pystac.Asset( + # href=path, + # media_type="image/tiff; application=geotiff", + # roles=["data"], + # title=band, + # ) + # item.add_asset(band, asset) + + stac_metadata = item.to_dict() + + import json + + # Serializing json + with open("demo_stac_metadata.json", "w") as outfile: + json.dump(stac_metadata, outfile, indent=4) + + return item.to_dict() + + @main.command(no_args_is_help=True) @click.option( "--task-id", diff --git a/dea_burn_cube/task.py b/dea_burn_cube/task.py index e2bb977..2c560a4 100644 --- a/dea_burn_cube/task.py +++ b/dea_burn_cube/task.py @@ -8,8 +8,9 @@ import datetime import logging import time -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Sequence, Tuple from urllib.parse import urlparse +from uuid import UUID, uuid5 import boto3 import botocore @@ -23,6 +24,10 @@ logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") +# Some random UUID to be ODC namespace +# copy paste from: odc-stats Model.py design +ODC_NS = UUID("6f34c6f4-13d6-43c0-8e4e-42b6c13203af") + def load_yaml_remote(yaml_url: str) -> Dict[str, Any]: """ @@ -291,3 +296,33 @@ def generate_processing_log( "DEA Burn Cube": version, "input_dataset_list": input_dataset_list, } + + +def odc_uuid( + algorithm: str, + algorithm_version: str, + sources: Sequence[UUID], + deployment_id: str = "", + **other_tags, +) -> UUID: + """ + Generate deterministic UUID for a derived Dataset. + + :param algorithm: Name of the algorithm + :param algorithm_version: Version string of the algorithm + :param sources: Sequence of input Dataset UUIDs + :param deployment_id: Some sort of identifier for installation that performs + the run, for example Docker image hash, or dea module version on NCI. + :param **other_tags: Any other identifiers necessary to uniquely identify dataset + """ + + tags = [f"{k}={str(v)}" for k, v in other_tags.items()] + + stringified_sources = ( + [str(algorithm), str(algorithm_version), str(deployment_id)] + + sorted(tags) + + [str(u) for u in sorted(sources)] + ) + + srcs_hashes = "\n".join(s.lower() for s in stringified_sources) + return uuid5(ODC_NS, srcs_hashes) diff --git a/requirements.txt b/requirements.txt index 0384acf..dd017c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ fiona==1.8.22 fsspec geopandas numpy==1.23.5 +odc_dscache>=0.2.2 pyarrow pyproj==3.2.1 pystac==1.7.3 From 0cd21db72f83d1691894ec4abf0959166cd38e06 Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Mon, 1 May 2023 05:37:52 +0000 Subject: [PATCH 4/9] Add collection to STAC dataset metadata --- dea_burn_cube/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index 6f93031..a676760 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -541,7 +541,7 @@ def burn_cube_add_metadata( processing_dt, timespec="seconds" ) properties["odc:region_code"] = region_id - properties["odc:product"] = "ga_ls8c_bc_4cyear_2020" + properties["odc:product"] = product_name properties["instruments"] = ["oli", "tirs"] # get it from ARD datasets properties["gsd"] = 15 # get it from ARD datasets properties["platform"] = "landsat-8" # get it from ARD datasets @@ -588,6 +588,7 @@ def burn_cube_add_metadata( bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top], datetime=pd.Timestamp(mappingperiod[0]).replace(tzinfo=timezone.utc), properties=properties, + collection=product_name, ) ProjectionExtension.add_to(item) From 735b8bd31e021cd6ad6f437c36f214274b53a375 Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Mon, 1 May 2023 23:38:57 +0000 Subject: [PATCH 5/9] Save ARD datasets as Burncube lineage --- dea_burn_cube/__main__.py | 80 +++++++++++++++++++++----------- dea_burn_cube/bc_data_loading.py | 21 ++++----- dea_burn_cube/task.py | 12 +++-- 3 files changed, 70 insertions(+), 43 deletions(-) diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index a676760..8e56813 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -514,7 +514,49 @@ def burn_cube_add_metadata( bc_running_task["Mapping Period End"], ) - # TODO: only check the NetCDF is not enough + odc_config = { + "db_hostname": os.getenv("ODC_DB_HOSTNAME"), + "db_password": os.getenv("ODC_DB_PASSWORD"), + "db_username": os.getenv("ODC_DB_USERNAME"), + "db_port": 5432, + "db_database": os.getenv("ODC_DB_DATABASE"), + } + + odc_dc = datacube.Datacube( + app=f"Burn Cube K8s processing - {region_id}", config=odc_config + ) + + ard_product_names = process_cfg["input_products"]["ard_product_names"] + + _, gridspec = parse_gridspec_with_name("au-30") + + # gridspec : au-30 + pattern = r"x(\d+)y(\d+)" + + match = re.match(pattern, region_id) + + if match: + x = int(match.group(1)) + y = int(match.group(2)) + print("x value:", x) + print("y value:", y) + else: + print("No match found.") + # cannot extract geobox, so we stop here. + # if we throw exception, it will trigger the Airflow/Argo retry. + sys.exit(0) + + geobox = gridspec.tile_geobox((x, y)) + + geobox_wgs84 = geobox.extent.to_crs( + "epsg:4326", resolution=math.inf, wrapdateline=True + ) + + bbox = geobox_wgs84.boundingbox + + input_datasets = odc_dc.find_datasets( + product=ard_product_names, geopolygon=geobox_wgs84, time=mappingperiod + ) # local_file_path, target_file_path = task.generate_output_filenames( # output, task_id, region_id, platform @@ -552,30 +594,6 @@ def burn_cube_add_metadata( properties["dea:dataset_maturity"] = "final" properties["odc:collection_number"] = 3 - _, gridspec = parse_gridspec_with_name("au-30") - - # gridspec : au-30 - pattern = r"x(\d+)y(\d+)" - - match = re.match(pattern, region_id) - - if match: - x = int(match.group(1)) - y = int(match.group(2)) - print("x value:", x) - print("y value:", y) - else: - print("No match found.") - sys.exit(0) # cannot extract geobox, stop here - - geobox = gridspec.tile_geobox((x, y)) - - geobox_wgs84 = geobox.extent.to_crs( - "epsg:4326", resolution=math.inf, wrapdateline=True - ) - - bbox = geobox_wgs84.boundingbox - uuid = task.odc_uuid( product_name, product_version, @@ -599,6 +617,9 @@ def burn_cube_add_metadata( shape=geobox.shape, ) + # Lineage last + item.properties["odc:lineage"] = dict(inputs=[str(e.id) for e in input_datasets]) + # Add all the assets # for band, path in self.paths(ext=ext).items(): # asset = pystac.Asset( @@ -737,7 +758,11 @@ def burn_cube_run( # check the input product detail # TODO: can add dry-run, and it will stop after input dataset list check try: - gpgon, input_dataset_list = bc_data_loading.check_input_datasets( + ( + gpgon, + summary_datasets, + ard_datasets, + ) = bc_data_loading.check_input_datasets( hnrs_dc, odc_dc, period, @@ -769,7 +794,8 @@ def burn_cube_run( region_id, output, task_table, - input_dataset_list, + summary_datasets, + ard_datasets, ) # No matter upload successful or not, should not block the main processing diff --git a/dea_burn_cube/bc_data_loading.py b/dea_burn_cube/bc_data_loading.py index fc7f420..c0bc347 100644 --- a/dea_burn_cube/bc_data_loading.py +++ b/dea_burn_cube/bc_data_loading.py @@ -126,7 +126,8 @@ def check_input_datasets( dataset is found for any input product. """ - overall_input_datasets = [] + summary_datasets = [] + ard_datasets = [] gpgon = _get_gpgon(region_id) @@ -155,7 +156,7 @@ def check_input_datasets( elif len(datasets) > 1: raise IncorrectInputDataError("Find one more than GeoMAD dataset") else: - overall_input_datasets.extend( + summary_datasets.extend( [{str(e.id): e.metadata_doc["label"]} for e in datasets] ) # Load the geometry from OpenDataCube again, avoid the pixel mismatch issue @@ -186,7 +187,7 @@ def check_input_datasets( elif len(datasets) > 1: raise IncorrectInputDataError("Find one more than WOfS summary dataset") else: - overall_input_datasets.extend( + summary_datasets.extend( [{str(e.id): e.metadata_doc["label"]} for e in datasets] ) @@ -197,10 +198,8 @@ def check_input_datasets( if len(datasets) == 0: raise IncorrectInputDataError("Cannot find any mapping ARD dataset") - # else: - # overall_input_datasets.extend( - # [{str(e.id): e.metadata_doc["properties"]["title"]} for e in datasets] - # ) + else: # we only keep UUID for ARD datasets to meet metadata request + ard_datasets.extend([str(e.id) for e in datasets]) logger.info("Load referance ARD from %s", "-".join(ard_product_names)) logger.info("Find %s referance ARD datasets", str(len(datasets))) @@ -212,10 +211,8 @@ def check_input_datasets( if len(datasets) == 0: raise IncorrectInputDataError("Cannot find any mapping ARD dataset") - # else: - # overall_input_datasets.extend( - # [{str(e.id): e.metadata_doc["properties"]["title"]} for e in datasets] - # ) + else: + ard_datasets.extend([str(e.id) for e in datasets]) logger.info("Load referance ARD from %s", "-".join(ard_product_names)) logger.info("Find %s mapping ARD datasets", str(len(datasets))) @@ -227,7 +224,7 @@ def check_input_datasets( region_polygon.geometry[0], crs="epsg:3577" ) - return gpgon, overall_input_datasets + return gpgon, summary_datasets, ard_datasets @task.log_execution_time diff --git a/dea_burn_cube/task.py b/dea_burn_cube/task.py index 2c560a4..513eeda 100644 --- a/dea_burn_cube/task.py +++ b/dea_burn_cube/task.py @@ -250,7 +250,8 @@ def generate_processing_log( region_id: str, output: str, task_table: str, - input_dataset_list: List[Dict[str, Any]], + summary_datasets: List[Dict[str, Any]], + ard_datasets: List[str], ) -> Dict[str, Any]: """ Generates a processing log dictionary for the task. @@ -274,8 +275,10 @@ def generate_processing_log( Output folder path task_table : str Name of the table to store the task - input_dataset_list : List[Dict[str, Any]] - List of input datasets with metadata + summary_datasets : List[Dict[str, Any]] + List of summary datasets with UUID and label + ard_datasets: List[str] + List of input ARD datasets with UUID only Returns: processing_log : Dict[str, Any]): A dictionary containing processing log information @@ -294,7 +297,8 @@ def generate_processing_log( "output": output, "task_table": task_table, "DEA Burn Cube": version, - "input_dataset_list": input_dataset_list, + "summary_datasets": summary_datasets, + "ard_datasets": ard_datasets, } From 6dd3f0eaa552f06be6b29160d12a19a9370e375f Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Tue, 2 May 2023 01:02:07 +0000 Subject: [PATCH 6/9] Add bands information to metadata --- dea_burn_cube/__main__.py | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index 8e56813..6988f0e 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -37,6 +37,9 @@ import dea_burn_cube.__version__ from dea_burn_cube import bc_data_loading, bc_data_processing, io, task +# from pystac.extensions.eo import Band, EOExtension + + logging.getLogger("botocore.credentials").setLevel(logging.WARNING) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") logger = logging.getLogger(__name__) @@ -597,7 +600,7 @@ def burn_cube_add_metadata( uuid = task.odc_uuid( product_name, product_version, - sources=[], + sources=[str(e.id) for e in input_datasets], ) item = pystac.Item( @@ -620,15 +623,42 @@ def burn_cube_add_metadata( # Lineage last item.properties["odc:lineage"] = dict(inputs=[str(e.id) for e in input_datasets]) + bands = [ + "wofssevere", + "wofsseverity", + "wofsmoderate", + "severe", + "severity", + "moderate", + "count", + ] + # Add all the assets - # for band, path in self.paths(ext=ext).items(): - # asset = pystac.Asset( - # href=path, - # media_type="image/tiff; application=geotiff", - # roles=["data"], - # title=band, - # ) - # item.add_asset(band, asset) + for band in bands: + asset = pystac.Asset( + href=f"BurnMapping-{data_source}-{task_id}-{region_id}-{band}.tif", + media_type="image/tiff; application=geotiff", + roles=["data"], + title=band, + ) + item.add_asset(band, asset) + + # eo = EOExtension.ext(asset) + # band = Band.create(name) + # eo.apply(bands=[band]) + + # if dataset.grids: + # proj_fields = _proj_fields(dataset.grids, measurement.grid) + # if proj_fields is not None: + # proj = ProjectionExtension.ext(asset) + # # Not sure how this handles None for an EPSG code + # proj.apply( + # shape=proj_fields["shape"], + # transform=proj_fields["transform"], + # epsg=epsg, + # ) + + # item.add_asset(name, asset=asset) stac_metadata = item.to_dict() From d64b44c836a193e712bc55e1bcbb80a3c0746c3b Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Tue, 2 May 2023 01:54:57 +0000 Subject: [PATCH 7/9] Add geometry info to bands in metadata --- dea_burn_cube/__main__.py | 67 ++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index 6988f0e..09bf3bd 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -30,6 +30,7 @@ from datacube.utils.cog import write_cog from datacube.utils.dates import normalise_dt from odc.dscache.tools.tiling import parse_gridspec_with_name +from pystac.extensions.eo import Band, EOExtension from pystac.extensions.projection import ProjectionExtension from shapely.geometry import Point from shapely.ops import unary_union @@ -37,9 +38,6 @@ import dea_burn_cube.__version__ from dea_burn_cube import bc_data_loading, bc_data_processing, io, task -# from pystac.extensions.eo import Band, EOExtension - - logging.getLogger("botocore.credentials").setLevel(logging.WARNING) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") logger = logging.getLogger(__name__) @@ -508,7 +506,7 @@ def burn_cube_add_metadata( task_table = process_cfg["task_table"] - # output = process_cfg["output_folder"] + output = process_cfg["output_folder"] bc_running_task = task.generate_task(task_id, task_table) @@ -561,14 +559,16 @@ def burn_cube_add_metadata( product=ard_product_names, geopolygon=geobox_wgs84, time=mappingperiod ) - # local_file_path, target_file_path = task.generate_output_filenames( - # output, task_id, region_id, platform - # ) + data_source = process_cfg["input_products"]["platform"] - # o = urlparse(output) + local_file_path, target_file_path = task.generate_output_filenames( + output, task_id, region_id, data_source + ) + + o = urlparse(output) - # bucket_name = o.netloc - # object_key = target_file_path[1:] + bucket_name = o.netloc + object_key = target_file_path[1:] processing_dt = datetime.utcnow() @@ -577,8 +577,6 @@ def burn_cube_add_metadata( properties: Dict[str, Any] = {} - data_source = process_cfg["input_products"]["platform"] - properties["title"] = f"BurnMapping-{data_source}-{task_id}-{region_id}" properties["dtr:start_datetime"] = format_datetime(mappingperiod[0]) properties["dtr:end_datetime"] = format_datetime(mappingperiod[1]) @@ -634,41 +632,38 @@ def burn_cube_add_metadata( ] # Add all the assets - for band in bands: + for band_name in bands: asset = pystac.Asset( - href=f"BurnMapping-{data_source}-{task_id}-{region_id}-{band}.tif", + href=f"BurnMapping-{data_source}-{task_id}-{region_id}-{band_name}.tif", media_type="image/tiff; application=geotiff", roles=["data"], - title=band, + title=band_name, ) - item.add_asset(band, asset) - # eo = EOExtension.ext(asset) - # band = Band.create(name) - # eo.apply(bands=[band]) + eo = EOExtension.ext(asset) + band = Band.create(band_name) + eo.apply(bands=[band]) - # if dataset.grids: - # proj_fields = _proj_fields(dataset.grids, measurement.grid) - # if proj_fields is not None: - # proj = ProjectionExtension.ext(asset) - # # Not sure how this handles None for an EPSG code - # proj.apply( - # shape=proj_fields["shape"], - # transform=proj_fields["transform"], - # epsg=epsg, - # ) + proj = ProjectionExtension.ext(asset) - # item.add_asset(name, asset=asset) + proj.apply( + geobox.crs.epsg, + transform=geobox.transform, + shape=geobox.shape, + ) - stac_metadata = item.to_dict() + item.add_asset(band_name, asset=asset) - import json + stac_metadata = item.to_dict() - # Serializing json - with open("demo_stac_metadata.json", "w") as outfile: - json.dump(stac_metadata, outfile, indent=4) + logger.info( + "Upload STAC metadata file %s in s3.", + bucket_name + "/" + object_key.replace(".nc", ".stac-item.json"), + ) - return item.to_dict() + io.upload_dict_to_s3( + stac_metadata, bucket_name, object_key.replace(".nc", ".stac-item.json") + ) @main.command(no_args_is_help=True) From b984e68cc33ccb68ca8bf0dcd1d8cf033da87b19 Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Tue, 2 May 2023 02:06:21 +0000 Subject: [PATCH 8/9] Add links info to STAC metadata --- dea_burn_cube/__main__.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index 09bf3bd..744d4a3 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -654,11 +654,48 @@ def burn_cube_add_metadata( item.add_asset(band_name, asset=asset) + stac_metadata_path = ( + bucket_name + "/" + object_key.replace(".nc", ".stac-item.json") + ) + + # Add links + item.links.append( + pystac.Link( + rel="product_overview", + media_type="application/json", + target=f"https://explorer.dea.ga.gov.au/product/{product_name}", + ) + ) + + item.links.append( + pystac.Link( + rel="collection", + media_type="application/json", + target=f"https://explorer.dea.ga.gov.au/stac/collections/{product_name}", + ) + ) + + item.links.append( + pystac.Link( + rel="alternative", + media_type="text/html", + target=f"https://explorer.dea.ga.gov.au/dataset/{str(uuid)}", + ) + ) + + item.links.append( + pystac.Link( + rel="self", + media_type="application/json", + target=stac_metadata_path, + ) + ) + stac_metadata = item.to_dict() logger.info( "Upload STAC metadata file %s in s3.", - bucket_name + "/" + object_key.replace(".nc", ".stac-item.json"), + stac_metadata_path, ) io.upload_dict_to_s3( From d972c3743aa67f2d245fce2bfab7f6a3d1ec057e Mon Sep 17 00:00:00 2001 From: Sai Ma Date: Thu, 4 May 2023 00:50:02 +0000 Subject: [PATCH 9/9] Move Burn Cube product setting to cfg file --- .../bc_ls_cy_debug_4gm_7bands_cfg.yaml | 14 +++++++++++ .../bc_ls_cy_pre_release_4gm_7bands_cfg.yaml | 14 +++++++++++ .../bc_ls_fy_pre_release_4gm_7bands_cfg.yaml | 6 +++++ dea_burn_cube/__main__.py | 25 ++++++++----------- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/configs/bc_processings/bc_ls_cy_debug_4gm_7bands_cfg.yaml b/configs/bc_processings/bc_ls_cy_debug_4gm_7bands_cfg.yaml index b14d006..937acee 100644 --- a/configs/bc_processings/bc_ls_cy_debug_4gm_7bands_cfg.yaml +++ b/configs/bc_processings/bc_ls_cy_debug_4gm_7bands_cfg.yaml @@ -21,4 +21,18 @@ input_products: - swir1 - swir2 +product: + name: bc_ls_cy_debug + short_name: bc_ls_cy_debug + version: 3.0.0 + product_family: burncube + bands: + - wofssevere + - wofsseverity + - wofsmoderate + - severe + - severity + - moderate + - count + task_table: 10-year-historical-processing-4year-geomad.csv diff --git a/configs/bc_processings/bc_ls_cy_pre_release_4gm_7bands_cfg.yaml b/configs/bc_processings/bc_ls_cy_pre_release_4gm_7bands_cfg.yaml index 6213a3b..e8db24c 100644 --- a/configs/bc_processings/bc_ls_cy_pre_release_4gm_7bands_cfg.yaml +++ b/configs/bc_processings/bc_ls_cy_pre_release_4gm_7bands_cfg.yaml @@ -21,4 +21,18 @@ input_products: - swir1 - swir2 +product: + name: ga_ls8c_bc_4cyear_2020 + short_name: ga_ls8c_bc_4cyear_2020 + version: 3.0.0 + product_family: burncube + bands: + - wofssevere + - wofsseverity + - wofsmoderate + - severe + - severity + - moderate + - count + task_table: 10-year-historical-processing-4year-geomad.csv diff --git a/configs/bc_processings/bc_ls_fy_pre_release_4gm_7bands_cfg.yaml b/configs/bc_processings/bc_ls_fy_pre_release_4gm_7bands_cfg.yaml index 30e3ef3..c212add 100644 --- a/configs/bc_processings/bc_ls_fy_pre_release_4gm_7bands_cfg.yaml +++ b/configs/bc_processings/bc_ls_fy_pre_release_4gm_7bands_cfg.yaml @@ -21,4 +21,10 @@ input_products: - swir1 - swir2 +product: + name: ga_ls8c_bc_4fyear_2020 + short_name: ga_ls8c_bc_4fyear_2020 + version: 3.0.0 + product_family: burncube + task_table: 10-year-historical-processing-4year-geomad.csv diff --git a/dea_burn_cube/__main__.py b/dea_burn_cube/__main__.py index 744d4a3..fc76c7d 100644 --- a/dea_burn_cube/__main__.py +++ b/dea_burn_cube/__main__.py @@ -539,10 +539,11 @@ def burn_cube_add_metadata( if match: x = int(match.group(1)) y = int(match.group(2)) - print("x value:", x) - print("y value:", y) else: - print("No match found.") + logger.error( + "No match found in region id %s.", + region_id, + ) # cannot extract geobox, so we stop here. # if we throw exception, it will trigger the Airflow/Argo retry. sys.exit(0) @@ -561,7 +562,7 @@ def burn_cube_add_metadata( data_source = process_cfg["input_products"]["platform"] - local_file_path, target_file_path = task.generate_output_filenames( + _, target_file_path = task.generate_output_filenames( output, task_id, region_id, data_source ) @@ -572,8 +573,8 @@ def burn_cube_add_metadata( processing_dt = datetime.utcnow() - product_name = "ga_ls8c_bc_4cyear_2020" - product_version = "3.0.0" + product_name = process_cfg["product"]["name"] + product_version = process_cfg["product"]["version"] properties: Dict[str, Any] = {} @@ -599,6 +600,8 @@ def burn_cube_add_metadata( product_name, product_version, sources=[str(e.id) for e in input_datasets], + tile=region_id, + time=str(mappingperiod), ) item = pystac.Item( @@ -621,15 +624,7 @@ def burn_cube_add_metadata( # Lineage last item.properties["odc:lineage"] = dict(inputs=[str(e.id) for e in input_datasets]) - bands = [ - "wofssevere", - "wofsseverity", - "wofsmoderate", - "severe", - "severity", - "moderate", - "count", - ] + bands = process_cfg["product"]["bands"] # Add all the assets for band_name in bands: