Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add load_earth_mask function for GSHHG Global Earth Mask dataset #2310

Merged
merged 17 commits into from
Jan 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ and store them in GMT's user data directory.
datasets.load_earth_free_air_anomaly
datasets.load_earth_geoid
datasets.load_earth_magnetic_anomaly
datasets.load_earth_mask
datasets.load_earth_relief
datasets.load_earth_vertical_gravity_gradient
datasets.load_sample_data
Expand Down
1 change: 1 addition & 0 deletions pygmt/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pygmt.datasets.earth_free_air_anomaly import load_earth_free_air_anomaly
from pygmt.datasets.earth_geoid import load_earth_geoid
from pygmt.datasets.earth_magnetic_anomaly import load_earth_magnetic_anomaly
from pygmt.datasets.earth_mask import load_earth_mask
from pygmt.datasets.earth_relief import load_earth_relief
from pygmt.datasets.earth_vertical_gravity_gradient import (
load_earth_vertical_gravity_gradient,
Expand Down
68 changes: 68 additions & 0 deletions pygmt/datasets/earth_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Function to download the GSHHG Global Earth Mask from the GMT data server, and
load as :class:`xarray.DataArray`.

The grids are available in various resolutions.
"""
from pygmt.datasets.load_remote_dataset import _load_remote_dataset
from pygmt.helpers import kwargs_to_strings

__doctest_skip__ = ["load_earth_mask"]


@kwargs_to_strings(region="sequence")
def load_earth_mask(resolution="01d", region=None, registration=None):
r"""
Load the GSHHG Global Earth Mask in various resolutions.

The grids are downloaded to a user data directory
(usually ``~/.gmt/server/earth/earth_mask/``) the first time you invoke
this function. Afterwards, it will load the grid from the data directory.
So you'll need an internet connection the first time around.

These grids can also be accessed by passing in the file name
**@earth_mask**\_\ *res*\[_\ *reg*] to any grid plotting/processing
function. *res* is the grid resolution (see below), and *reg* is grid
registration type (**p** for pixel registration or **g** for gridline
registration).

Refer to :gmt-datasets:`earth-mask.html` for more details.

Parameters
----------
resolution : str
The grid resolution. The suffix ``d``, ``m``, and ``s`` stand for
arc-degrees, arc-minutes, and arc-seconds. It can be ``"01d"``,
``"30m"``, ``"20m"``, ``"15m"``, ``"10m"``, ``"06m"``, ``"05m"``,
``"04m"``, ``"03m"``, ``"02m"``, ``"01m"``, ``"30s"``, or ``"15s"``.

region : str or list
The subregion of the grid to load, in the form of a list
[*xmin*, *xmax*, *ymin*, *ymax*] or a string *xmin/xmax/ymin/ymax*.

registration : str
Grid registration type. Either ``"pixel"`` for pixel registration or
``"gridline"`` for gridline registration. Default is ``"gridline"``.

Returns
-------
grid : :class:`xarray.DataArray`
The Earth mask grid. Coordinates are latitude and
longitude in degrees. The node values in the mask grids are all in
the 0-4 range and reflect different surface types:

- 0: Oceanic areas beyond the shoreline
- 1: Land areas inside the shoreline
- 2: Lakes inside the land areas
- 3: Islands in lakes in the land areas
- 4: Smaller lakes in islands that are found within lakes
inside the land area
"""
Copy link
Member

@seisman seisman Jan 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function now returns a dataarray like below:

<xarray.DataArray 'z' (lat: 181, lon: 361)>
array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)
Coordinates:
  * lon      (lon) float64 -180.0 -179.0 -178.0 -177.0 ... 178.0 179.0 180.0
  * lat      (lat) float64 -90.0 -89.0 -88.0 -87.0 -86.0 ... 87.0 88.0 89.0 90.0
Attributes:
    long_name:     z
    actual_range:  [0. 2.]

The data dtype is float32, but the original grid @earth_mask_01d_g contains int8 values. Ideally, the function should return a grid with integer values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would I change the value types across the entire array?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would I change the value types across the entire array?

I don't know. Need to read the xarray docs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely we need to use xarray.DataArray.astype to convert the data from float32 to int8, i.e., return grid.astype("int8").

grid = _load_remote_dataset(
dataset_name="earth_mask",
dataset_prefix="earth_mask_",
resolution=resolution,
region=region,
registration=registration,
)
return grid.astype("int8")
26 changes: 25 additions & 1 deletion pygmt/datasets/load_remote_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,28 @@ class GMTRemoteDataset(NamedTuple):
"02m": Resolution(["pixel"], True),
},
),
"earth_mask": GMTRemoteDataset(
title="Earth mask",
name="earth_mask",
long_name="Mask of land and water features",
units=None,
extra_attributes={"horizontal_datum": "WGS84"},
resolutions={
"01d": Resolution(["gridline", "pixel"], False),
"30m": Resolution(["gridline", "pixel"], False),
"20m": Resolution(["gridline", "pixel"], False),
"15m": Resolution(["gridline", "pixel"], False),
"10m": Resolution(["gridline", "pixel"], False),
"06m": Resolution(["gridline", "pixel"], False),
"05m": Resolution(["gridline", "pixel"], False),
"04m": Resolution(["gridline", "pixel"], False),
"03m": Resolution(["gridline", "pixel"], False),
"02m": Resolution(["gridline", "pixel"], False),
"01m": Resolution(["gridline", "pixel"], False),
"30s": Resolution(["gridline", "pixel"], False),
"15s": Resolution(["gridline", "pixel"], False),
},
),
"earth_relief": GMTRemoteDataset(
title="Earth relief",
name="elevation",
Expand Down Expand Up @@ -248,6 +270,7 @@ def _load_remote_dataset(
The returned :class:`xarray.DataArray` doesn't support slice operation for
tiled grids.
"""
# pylint: disable=too-many-branches
dataset = datasets[dataset_name]
if resolution not in dataset.resolutions.keys():
raise GMTInvalidInput(f"Invalid resolution '{resolution}'.")
Expand Down Expand Up @@ -295,7 +318,8 @@ def _load_remote_dataset(
# Add some metadata to the grid
grid.name = dataset.name
grid.attrs["long_name"] = dataset.long_name
grid.attrs["units"] = dataset.units
if dataset.units:
grid.attrs["units"] = dataset.units
for key, value in dataset.extra_attributes.items():
grid.attrs[key] = value
# Remove the actual range because it gets outdated when indexing the grid,
Expand Down
2 changes: 2 additions & 0 deletions pygmt/helpers/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def download_test_data():
"@earth_mag_01d_g",
"@S30W060.earth_mag_02m_p.nc", # Specific grid for 02m test
"@earth_mag4km_01d_g",
# Earth mask grid
"@earth_mask_01d_g",
# Earth free-air anomaly grids
"@earth_faa_01d_g",
"@N00W030.earth_faa_01m_p.nc", # Specific grid for 01m test
Expand Down
57 changes: 57 additions & 0 deletions pygmt/tests/test_datasets_earth_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Test basic functionality for loading Earth mask datasets.
"""
import numpy as np
import numpy.testing as npt
import pytest
from pygmt.datasets import load_earth_mask
from pygmt.exceptions import GMTInvalidInput


def test_earth_mask_fails():
"""
Make sure load_earth_mask fails for invalid resolutions.
"""
resolutions = "1m 1d bla 60d 001m 03".split()
resolutions.append(60)
for resolution in resolutions:
with pytest.raises(GMTInvalidInput):
load_earth_mask(resolution=resolution)


def test_earth_mask_incorrect_registration():
"""
Test loading load_earth_mask with incorrect registration type.
"""
with pytest.raises(GMTInvalidInput):
load_earth_mask(registration="improper_type")


def test_earth_mask_01d():
"""
Test some properties of the Earth mask 01d data.
"""
data = load_earth_mask(resolution="01d")
assert data.name == "earth_mask"
assert data.attrs["long_name"] == "Mask of land and water features"
assert data.attrs["horizontal_datum"] == "WGS84"
assert data.shape == (181, 361)
assert data.gmt.registration == 0
assert data.dtype == "int8"
npt.assert_allclose(data.lat, np.arange(-90, 91, 1))
npt.assert_allclose(data.lon, np.arange(-180, 181, 1))
npt.assert_allclose(data.min(), 0)
npt.assert_allclose(data.max(), 2)
npt.assert_allclose(data[36, 45], 0)


def test_earth_mask_01d_with_region():
"""
Test loading low-resolution Earth mask with 'region'.
"""
data = load_earth_mask(resolution="01d", region=[-7, 4, 13, 19])
assert data.shape == (7, 12)
assert data.gmt.registration == 0
npt.assert_allclose(data.lat, np.arange(13, 20, 1))
npt.assert_allclose(data.lon, np.arange(-7, 5, 1))
npt.assert_allclose(data[1, 5], 1)