-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ PySTACItemReaderIterDataPipe for reading STAC Items (#46)
An iterable-style DataPipe for STAC items! Uses pystac for reading the files or URLs into pystac.Item objects. * ✨ PySTACItemReaderIterDataPipe for reading STAC Items An iterable-style DataPipe for STAC items! Uses pystac for reading the files or URLs. Included a doctest that checks the metadata within the pystac.item.Item object. Added a new section in the API docs and an intersphinx mapping. * 💚 Make pystac an extras dependency in the spatial category Ensure that zen3geo works even when `pystac` is not installed and add `pystac` to the spatial section of the extras dependencies in pyproject.toml. * 🚚 Rename functional form as read_to_pystac_item Decided that since the returned object is a `pystac.Item`, it should probably be `read_to_pystac_item`. * ✅ Unit test to read JSON and return a pystac.Item Ensure that a JSON STAC item can be read into a pystac.Item object that contains various spatiotemporal metadata. * 💚 Skip PySTACItemReader doctests if pystac not installed Use pytest.importorskip to skip running the doctest when pystac cannot be imported.
- Loading branch information
Showing
7 changed files
with
158 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
""" | ||
DataPipes for :doc:`pystac <pystac:index>`. | ||
""" | ||
from typing import Any, Dict, Iterator, Optional | ||
|
||
try: | ||
import pystac | ||
except ImportError: | ||
pystac = None | ||
from torchdata.datapipes import functional_datapipe | ||
from torchdata.datapipes.iter import IterDataPipe | ||
|
||
|
||
@functional_datapipe("read_to_pystac_item") | ||
class PySTACItemReaderIterDataPipe(IterDataPipe): | ||
""" | ||
Takes files from local disk or URLs (as long as they can be read by pystac) | ||
and yields :py:class:`pystac.Item` objects (functional name: | ||
``read_to_pystac_item``). | ||
Parameters | ||
---------- | ||
source_datapipe : IterDataPipe[str] | ||
A DataPipe that contains filepaths or URL links to STAC items. | ||
kwargs : Optional | ||
Extra keyword arguments to pass to :py:meth:`pystac.Item.from_file`. | ||
Yields | ||
------ | ||
stac_item : pystac.Item | ||
An :py:class:`pystac.Item` object containing the specific STACObject | ||
implementation class represented in a JSON format. | ||
Raises | ||
------ | ||
ModuleNotFoundError | ||
If ``pystac`` is not installed. See | ||
:doc:`install instructions for pystac <pystac:installation>`, (e.g. via | ||
``pip install pystac``) before using this class. | ||
Example | ||
------- | ||
>>> import pytest | ||
>>> pystac = pytest.importorskip("pystac") | ||
... | ||
>>> from torchdata.datapipes.iter import IterableWrapper | ||
>>> from zen3geo.datapipes import PySTACItemReader | ||
... | ||
>>> # Read in STAC Item using DataPipe | ||
>>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435" | ||
>>> dp = IterableWrapper(iterable=[item_url]) | ||
>>> dp_pystac = dp.read_to_pystac_item() | ||
... | ||
>>> # Loop or iterate over the DataPipe stream | ||
>>> it = iter(dp_pystac) | ||
>>> stac_item = next(it) | ||
>>> stac_item.bbox | ||
[103.20205689, 0.81602476, 104.18934086, 1.8096362] | ||
>>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE | ||
{'datetime': '2022-01-15T03:21:01.024000Z', | ||
'platform': 'Sentinel-2A', | ||
'proj:epsg': 32648, | ||
'instruments': ['msi'], | ||
's2:mgrs_tile': '48NUG', | ||
'constellation': 'Sentinel 2', | ||
's2:granule_id': 'S2A_OPER_MSI_L2A_TL_ESRI_20220115T170436_A034292_T48NUG_N03.00', | ||
'eo:cloud_cover': 17.352597, | ||
's2:datatake_id': 'GS2A_20220115T032101_034292_N03.00', | ||
's2:product_uri': 'S2A_MSIL2A_20220115T032101_N0300_R118_T48NUG_20220115T170435.SAFE', | ||
's2:datastrip_id': 'S2A_OPER_MSI_L2A_DS_ESRI_20220115T170436_S20220115T033502_N03.00', | ||
's2:product_type': 'S2MSI2A', | ||
'sat:orbit_state': 'descending', | ||
... | ||
""" | ||
|
||
def __init__( | ||
self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]] | ||
) -> None: | ||
if pystac is None: | ||
raise ModuleNotFoundError( | ||
"Package `pystac` is required to be installed to use this datapipe. " | ||
"Please use `pip install pystac` or " | ||
"`conda install -c conda-forge pystac` " | ||
"to install the package" | ||
) | ||
self.source_datapipe: IterDataPipe[str] = source_datapipe | ||
self.kwargs = kwargs | ||
|
||
def __iter__(self) -> Iterator: | ||
for href in self.source_datapipe: | ||
yield pystac.Item.from_file(href=href, **self.kwargs) | ||
|
||
def __len__(self) -> int: | ||
return len(self.source_datapipe) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
""" | ||
Tests for pystac datapipes. | ||
""" | ||
import pytest | ||
from torchdata.datapipes.iter import IterableWrapper | ||
|
||
from zen3geo.datapipes import PySTACItemReader | ||
|
||
pystac = pytest.importorskip("pystac") | ||
|
||
# %% | ||
def test_pystac_item_reader(): | ||
""" | ||
Ensure that PySTACItemReader works to read in a JSON STAC item and outputs | ||
to a pystac.Item object. | ||
""" | ||
item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/item/sample-item.json" | ||
dp = IterableWrapper(iterable=[item_url]) | ||
|
||
# Using class constructors | ||
dp_pystac = PySTACItemReader(source_datapipe=dp) | ||
# Using functional form (recommended) | ||
dp_pystac = dp.read_to_pystac_item() | ||
|
||
assert len(dp_pystac) == 1 | ||
it = iter(dp_pystac) | ||
stac_item = next(it) | ||
|
||
assert stac_item.bbox == [-122.59750209, 37.48803556, -122.2880486, 37.613537207] | ||
assert stac_item.datetime.isoformat() == "2016-05-03T13:22:30.040000+00:00" | ||
assert stac_item.geometry["type"] == "Polygon" | ||
assert stac_item.properties == { | ||
"datetime": "2016-05-03T13:22:30.040000Z", | ||
"title": "A CS3 item", | ||
"license": "PDDL-1.0", | ||
"providers": [ | ||
{ | ||
"name": "CoolSat", | ||
"roles": ["producer", "licensor"], | ||
"url": "https://cool-sat.com/", | ||
} | ||
], | ||
} | ||
assert ( | ||
stac_item.assets["analytic"].extra_fields["product"] | ||
== "http://cool-sat.com/catalog/products/analytic.json" | ||
) |