From 29705e91b4c8952674c68f0f6ba83a2ca63047b6 Mon Sep 17 00:00:00 2001 From: asteiker Date: Thu, 30 Mar 2023 17:24:32 +0000 Subject: [PATCH 01/10] Start Harmony-py subset example with PO.DAAC data --- how-tos/subset.qmd | 59 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index 29cf9171..66627e94 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -1,10 +1,63 @@ --- title: How do I subset data granules? +execute: + eval: false --- -How do I subset a data granule using Harmony? -How do I subset an OPeNDAP granule in the cloud? -How do I subset a data granule using xarray? +## How do I subset a data granule using Harmony? + +::: {.panel-tabset group="language"} + +### Python + +Install the [harmony-py]("https://github.com/nasa/harmony-py") package: + +```{python} +# Install harmony-py +pip install -U harmony-py +``` + +Import packages: + +```{python} +import datetime as dt + +from harmony import BBox, Client, Collection, Request +from harmony.config import Environment + +#collection example: #MUR-JPL-L4-GLOB-v4.1 +#2021-03-10T00:46:02Z +#2022-03-10T00:46:02Z +#"-125.469,15.820,-99.453,35.859" +``` + +### Set up Harmony client and authentication + +We will authenticate the following Harmony request using a netrc file. See the [appendix]("https://nasa-openscapes.github.io/earthdata-cloud-cookbook/appendix/authentication.html") for more information on [Earthdata Login]("https://urs.earthdata.nasa.gov/") and netrc setup. + +```{python} +harmony_client = Client() + +request = Request( + collection=Collection(id=dataset_short_name), + spatial=BBox(w, s, e, n), + temporal={ + 'start': dt.datetime(yyyy, mm, dd), + 'stop': dt.datetime(yyyy, mm, dd) + } +) + +job_id = harmony_client.submit(request) + +results = harmony_client.download_all(job_id, directory='/tmp', overwrite=True) + +``` + +## How do I subset an OPeNDAP granule in the cloud? + + +## How do I subset a data granule using xarray? + ## How do I download a subset of NetCDF-4? *this might be a deprecated idea* \ No newline at end of file From b2193e823fb69b04ec01e8ff949dcac50a92e97d Mon Sep 17 00:00:00 2001 From: asteiker Date: Wed, 10 May 2023 20:59:24 +0000 Subject: [PATCH 02/10] populate with MUR example --- how-tos/subset.qmd | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index 66627e94..faf6e2d3 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -24,11 +24,6 @@ import datetime as dt from harmony import BBox, Client, Collection, Request from harmony.config import Environment - -#collection example: #MUR-JPL-L4-GLOB-v4.1 -#2021-03-10T00:46:02Z -#2022-03-10T00:46:02Z -#"-125.469,15.820,-99.453,35.859" ``` ### Set up Harmony client and authentication @@ -36,21 +31,23 @@ from harmony.config import Environment We will authenticate the following Harmony request using a netrc file. See the [appendix]("https://nasa-openscapes.github.io/earthdata-cloud-cookbook/appendix/authentication.html") for more information on [Earthdata Login]("https://urs.earthdata.nasa.gov/") and netrc setup. ```{python} +dataset_short_name = 'MUR-JPL-L4-GLOB-v4.1' + harmony_client = Client() request = Request( collection=Collection(id=dataset_short_name), - spatial=BBox(w, s, e, n), + spatial=BBox(-125.469,15.820,-99.453,35.859), temporal={ - 'start': dt.datetime(yyyy, mm, dd), - 'stop': dt.datetime(yyyy, mm, dd) + 'start': dt.datetime(2021, 3, 10), + 'stop': dt.datetime(2022, 3, 10) } ) job_id = harmony_client.submit(request) -results = harmony_client.download_all(job_id, directory='/tmp', overwrite=True) - +harmony_client.wait_for_processing(job_id, show_progress=True) +harmony_client.resume(job_id) ``` ## How do I subset an OPeNDAP granule in the cloud? From 0448d954d61f2e3f787e7ef20f7553c01a23f779 Mon Sep 17 00:00:00 2001 From: jules32 Date: Wed, 10 May 2023 21:53:34 +0000 Subject: [PATCH 03/10] trigger PAT --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index e56f725f..70f2130d 100644 --- a/README.md +++ b/README.md @@ -37,4 +37,3 @@ At our 2nd hackday we had a brief overview of cookbook progress since we\'d last - [Cookbook GitHub Issues](https://github.com/NASA-Openscapes/earthdata-cloud-cookbook/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc) - began organizing ideas in Issues -## From a7484af0ba3c0d8f241513fda89e5555a958299a Mon Sep 17 00:00:00 2001 From: Owen Littlejohns Date: Wed, 10 May 2023 22:06:18 +0000 Subject: [PATCH 04/10] Update MUR request to fewer results. --- how-tos/subset.qmd | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index faf6e2d3..58a31bfe 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -12,7 +12,7 @@ execute: Install the [harmony-py]("https://github.com/nasa/harmony-py") package: -```{python} +```{bash} # Install harmony-py pip install -U harmony-py ``` @@ -26,7 +26,7 @@ from harmony import BBox, Client, Collection, Request from harmony.config import Environment ``` -### Set up Harmony client and authentication +**Set up Harmony client and authentication** We will authenticate the following Harmony request using a netrc file. See the [appendix]("https://nasa-openscapes.github.io/earthdata-cloud-cookbook/appendix/authentication.html") for more information on [Earthdata Login]("https://urs.earthdata.nasa.gov/") and netrc setup. @@ -39,17 +39,18 @@ request = Request( collection=Collection(id=dataset_short_name), spatial=BBox(-125.469,15.820,-99.453,35.859), temporal={ - 'start': dt.datetime(2021, 3, 10), - 'stop': dt.datetime(2022, 3, 10) - } + 'start': dt.datetime(2021, 3, 10, 1), + 'stop': dt.datetime(2021, 3, 10, 2) + } ) job_id = harmony_client.submit(request) -harmony_client.wait_for_processing(job_id, show_progress=True) -harmony_client.resume(job_id) +harmony_client.wait_for_processing(job_id) ``` +::: + ## How do I subset an OPeNDAP granule in the cloud? From dc2a5c44663fdc0d9b8fa38e8551d693b2c213c4 Mon Sep 17 00:00:00 2001 From: jules32 Date: Wed, 10 May 2023 22:21:39 +0000 Subject: [PATCH 05/10] added tabset groups with Owen and Amy --- how-tos/subset.qmd | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index 58a31bfe..438f92b4 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -8,7 +8,7 @@ execute: ::: {.panel-tabset group="language"} -### Python +## Python Install the [harmony-py]("https://github.com/nasa/harmony-py") package: @@ -26,7 +26,7 @@ from harmony import BBox, Client, Collection, Request from harmony.config import Environment ``` -**Set up Harmony client and authentication** +### Set up Harmony client and authentication We will authenticate the following Harmony request using a netrc file. See the [appendix]("https://nasa-openscapes.github.io/earthdata-cloud-cookbook/appendix/authentication.html") for more information on [Earthdata Login]("https://urs.earthdata.nasa.gov/") and netrc setup. @@ -49,6 +49,33 @@ job_id = harmony_client.submit(request) harmony_client.wait_for_processing(job_id) ``` +## R + +R code coming soon! + +```{r} +# Coming soon! +``` + +## Matlab + +Matlab code coming soon! + +```{bash} +#| echo: true +# Coming soon! +``` + +## Command Line + +With `wget` and `curl`: + +```{bash} +# Coming soon! +``` + + + ::: ## How do I subset an OPeNDAP granule in the cloud? From 0d67b50465c5e549a7fa062295de8694169f9cb5 Mon Sep 17 00:00:00 2001 From: asteiker Date: Wed, 10 May 2023 22:23:42 +0000 Subject: [PATCH 06/10] adding explanatory text --- how-tos/subset.qmd | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index 58a31bfe..1871a0a5 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -28,12 +28,19 @@ from harmony.config import Environment **Set up Harmony client and authentication** -We will authenticate the following Harmony request using a netrc file. See the [appendix]("https://nasa-openscapes.github.io/earthdata-cloud-cookbook/appendix/authentication.html") for more information on [Earthdata Login]("https://urs.earthdata.nasa.gov/") and netrc setup. +We will authenticate the following Harmony request using a netrc file. See the [appendix]("https://nasa-openscapes.github.io/earthdata-cloud-cookbook/appendix/authentication.html") for more information on [Earthdata Login]("https://urs.earthdata.nasa.gov/") and netrc setup. This basic line below to create a Harmony Client assumes that we have a .netrc available. + +```{python} +harmony_client = Client() +``` +**Create Harmony request** + +We are interested in the GHRSST Level 4 MUR Global Foundation Sea Surface Temperature Analysis dataset https://doi.org/10.5067/GHGMR-4FJ04 ```{python} dataset_short_name = 'MUR-JPL-L4-GLOB-v4.1' -harmony_client = Client() + request = Request( collection=Collection(id=dataset_short_name), From 12209f6d41a5eab266c4354e7ff31180b87c42f7 Mon Sep 17 00:00:00 2001 From: asteiker Date: Wed, 10 May 2023 22:52:04 +0000 Subject: [PATCH 07/10] added explanatory text and open file with s3 error --- how-tos/subset.qmd | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index ca3772d9..a80b73b7 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -22,8 +22,10 @@ Import packages: ```{python} import datetime as dt -from harmony import BBox, Client, Collection, Request +from harmony import BBox, Client, Collection, Request, LinkType from harmony.config import Environment + +import s3fs ``` ### Set up Harmony client and authentication @@ -33,15 +35,13 @@ We will authenticate the following Harmony request using a netrc file. See the [ ```{python} harmony_client = Client() ``` -**Create Harmony request** +### Create and submit Harmony request -We are interested in the GHRSST Level 4 MUR Global Foundation Sea Surface Temperature Analysis dataset https://doi.org/10.5067/GHGMR-4FJ04 +We are interested in the [GHRSST Level 4 MUR Global Foundation Sea Surface Temperature Analysis dataset](https://doi.org/10.5067/GHGMR-4FJ04). We are subsetting over the Pacific Ocean to the west of Mexico during 1:00 - 2:00 on 10 March 2021. The dataset is organized into daily files, so while we are specifying a single hour in our request, this will return that full day's worth of data. ```{python} dataset_short_name = 'MUR-JPL-L4-GLOB-v4.1' - - request = Request( collection=Collection(id=dataset_short_name), spatial=BBox(-125.469,15.820,-99.453,35.859), @@ -53,7 +53,29 @@ request = Request( job_id = harmony_client.submit(request) -harmony_client.wait_for_processing(job_id) +harmony_client.wait_for_processing(job_id, link_type=LinkType.s3) +``` +### Open and read the subsetted file + +```{python} +results = harmony_client.result_urls(job_id) +urls = list(results) + +creds = harmony_client.aws_credentials() + +s3_fs = s3fs.S3FileSystem( + key=creds['aws_access_key_id'], + secret=creds['aws_secret_access_key'], + token=creds['aws_session_token'], + client_kwargs={'region_name':'us-west-2'}, +) +``` +### Access error here: + +```{python} +f = s3_fs.open(url, mode='rb') +ds = xr.open_dataset(f) +ds ``` ## R From 3b225f4e4e1d97be3a78d607ac3715c080ade79b Mon Sep 17 00:00:00 2001 From: asteiker Date: Wed, 10 May 2023 23:13:40 +0000 Subject: [PATCH 08/10] fixed link type copy paste error --- how-tos/subset.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index a80b73b7..39efe019 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -53,12 +53,12 @@ request = Request( job_id = harmony_client.submit(request) -harmony_client.wait_for_processing(job_id, link_type=LinkType.s3) +harmony_client.wait_for_processing(job_id) ``` ### Open and read the subsetted file ```{python} -results = harmony_client.result_urls(job_id) +results = harmony_client.result_urls(job_id, link_type=LinkType.s3) urls = list(results) creds = harmony_client.aws_credentials() From efe6f1980ce953dc6017105e656f92eb2315cae4 Mon Sep 17 00:00:00 2001 From: asteiker Date: Thu, 11 May 2023 14:41:34 +0000 Subject: [PATCH 09/10] add url from url list --- how-tos/subset.qmd | 1 + 1 file changed, 1 insertion(+) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index 39efe019..2e226431 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -60,6 +60,7 @@ harmony_client.wait_for_processing(job_id) ```{python} results = harmony_client.result_urls(job_id, link_type=LinkType.s3) urls = list(results) +url = urls[0] creds = harmony_client.aws_credentials() From 4a144b865906c6b266b05ac9d5efb4cc48a503cd Mon Sep 17 00:00:00 2001 From: asteiker Date: Thu, 11 May 2023 20:04:32 +0000 Subject: [PATCH 10/10] end-to-end working harmony-py example --- how-tos/subset.qmd | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/how-tos/subset.qmd b/how-tos/subset.qmd index 2e226431..36e0858d 100644 --- a/how-tos/subset.qmd +++ b/how-tos/subset.qmd @@ -23,9 +23,9 @@ Import packages: import datetime as dt from harmony import BBox, Client, Collection, Request, LinkType -from harmony.config import Environment import s3fs +import xarray as xr ``` ### Set up Harmony client and authentication @@ -55,7 +55,9 @@ job_id = harmony_client.submit(request) harmony_client.wait_for_processing(job_id) ``` -### Open and read the subsetted file +### Open and read the subsetted file in `xarray` + +Harmony data outputs can be accessed within the cloud using the s3 URLs and AWS credentials provided in the Harmony job response. Using `aws_credentials` we can retrieve the credentials needed to access the Harmony s3 staging bucket and its contents. We then use the AWS `s3fs` package to create a file system that can then be read by xarray. ```{python} results = harmony_client.result_urls(job_id, link_type=LinkType.s3) @@ -70,14 +72,18 @@ s3_fs = s3fs.S3FileSystem( token=creds['aws_session_token'], client_kwargs={'region_name':'us-west-2'}, ) -``` -### Access error here: -```{python} f = s3_fs.open(url, mode='rb') ds = xr.open_dataset(f) ds ``` +### Plot data + +Use the xarray built in plotting function to create a simple plot along the x and y dimensions of the dataset: + +```{python} +ds.analysed_sst.plot() ; +``` ## R