Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a single function for loading any sample dataset #1685

Merged
merged 15 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,10 @@ and store them in the GMT cache folder.
.. autosummary::
:toctree: generated

datasets.list_sample_data
datasets.load_earth_age
datasets.load_earth_relief
datasets.load_sample_data
datasets.load_fractures_compilation
datasets.load_hotspots
datasets.load_japan_quakes
Expand Down
2 changes: 2 additions & 0 deletions pygmt/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from pygmt.datasets.earth_age import load_earth_age
from pygmt.datasets.earth_relief import load_earth_relief
from pygmt.datasets.samples import (
list_sample_data,
load_fractures_compilation,
load_hotspots,
load_japan_quakes,
load_mars_shape,
load_ocean_ridge_points,
load_sample_bathymetry,
load_sample_data,
load_usgs_quakes,
)
203 changes: 186 additions & 17 deletions pygmt/datasets/samples.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,91 @@
"""
Functions to load sample data.
"""
import warnings

import pandas as pd
from pygmt.exceptions import GMTInvalidInput
from pygmt.src import which


def load_japan_quakes():
def list_sample_data():
"""
Report datasets available for tests and documentation examples.

Returns
-------
dict
Names and short descriptions of available sample datasets.

See Also
--------
load_sample_data : Load an example dataset from the GMT server.
"""
Load a table of earthquakes around Japan as a pandas.DataFrame.
names = {
"japan_quakes": "Table of earthquakes around Japan from NOAA NGDC database",
"ocean_ridge_points": "Table of ocean ridge points for the entire world",
"bathymetry": "Table of ship bathymetric observations off Baja California",
"usgs_quakes": "Table of global earthquakes from the USGS",
"fractures": "Table of hypothetical fracture lengths and azimuths",
"hotspots": "Table of locations, names, and symbol sizes of hotpots from "
" Mueller et al., 1993",
"mars_shape": "Table of topographic signature of the hemispheric dichotomy of "
" Mars from Smith and Zuber (1996)",
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
}
return names


def load_sample_data(name):
Copy link
Member

@seisman seisman Jan 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking if we can merge the list_sample_data() function into load_sample_data(), so that we don't have to maintain two dictionaries.

For example, calling load_sample_data() without giving a name can return the name-description dict.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to keep them separate even though it requires two dictionaries because I think overall it's simpler to have each function have one purpose.

"""
Load an example dataset from the GMT server.

The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the
first time you invoke this function. Afterwards, it will load the data from
the cache. So you'll need an internet connection the first time around.

Parameters
----------
name : str
Name of the dataset to load.

Returns
-------
:class:`pandas.DataFrame` or :class:`xarray.DataArray`
Sample dataset loaded as a pandas.DataFrame for tabular data or
xarray.DataArray for raster data
maxrjones marked this conversation as resolved.
Show resolved Hide resolved

See Also
--------
list_sample_data : Report datasets available for tests and documentation
examples.
"""
names = list_sample_data()
if name not in names:
raise GMTInvalidInput(f"Invalid dataset name '{name}'.")

load_func = {
"japan_quakes": load_japan_quakes,
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
"ocean_ridge_points": load_ocean_ridge_points,
"bathymetry": load_sample_bathymetry,
"usgs_quakes": load_usgs_quakes,
"fractures": load_fractures_compilation,
"hotspots": load_hotspots,
"mars_shape": load_mars_shape,
}

data = load_func[name](suppress_warning=True)

return data


def load_japan_quakes(**kwargs):
"""
(Deprecated) Load a table of earthquakes around Japan as a
pandas.DataFrame.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="japan_quakes")`` and will be removed in
v0.9.0.
weiji14 marked this conversation as resolved.
Show resolved Hide resolved

Data is from the NOAA NGDC database. This is the ``@tut_quakes.ngdc``
dataset used in the GMT tutorials.
Expand All @@ -22,6 +100,16 @@ def load_japan_quakes():
The data table. Columns are year, month, day, latitude, longitude,
depth (in km), and magnitude of the earthquakes.
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be "
"removed in v0.9.0. Please use "
"load_sample_data(name='japan_quakes') instead.",
category=FutureWarning,
stacklevel=2,
)

fname = which("@tut_quakes.ngdc", download="c")
data = pd.read_csv(fname, header=1, sep=r"\s+")
data.columns = [
Expand All @@ -33,14 +121,19 @@ def load_japan_quakes():
"depth_km",
"magnitude",
]

return data


def load_ocean_ridge_points():
def load_ocean_ridge_points(**kwargs):
"""
Load a table of ocean ridge points for the entire world as a
(Deprecated) Load a table of ocean ridge points for the entire world as a
pandas.DataFrame.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="ocean_ridge_points")`` and will be removed in
v0.9.0.

This is the ``@ridge.txt`` dataset used in the GMT tutorials.

The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the
Expand All @@ -52,17 +145,31 @@ def load_ocean_ridge_points():
data : pandas.DataFrame
The data table. Columns are longitude and latitude.
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be removed "
"in v0.9.0. Please use load_sample_data(name='ocean_ridge_points') "
"instead.",
category=FutureWarning,
stacklevel=2,
)

fname = which("@ridge.txt", download="c")
data = pd.read_csv(
fname, sep=r"\s+", names=["longitude", "latitude"], skiprows=1, comment=">"
)
return data


def load_sample_bathymetry():
def load_sample_bathymetry(**kwargs):
"""
Load a table of ship observations of bathymetry off Baja California as a
pandas.DataFrame.
(Deprecated) Load a table of ship observations of bathymetry off Baja
California as a pandas.DataFrame.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="bathymetry")`` and will be removed in
v0.9.0.

This is the ``@tut_ship.xyz`` dataset used in the GMT tutorials.

Expand All @@ -75,16 +182,30 @@ def load_sample_bathymetry():
data : pandas.DataFrame
The data table. Columns are longitude, latitude, and bathymetry.
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be "
"removed in v0.9.0. Please use "
"load_sample_data(name='bathymetry') instead.",
category=FutureWarning,
stacklevel=2,
)
fname = which("@tut_ship.xyz", download="c")
data = pd.read_csv(
fname, sep="\t", header=None, names=["longitude", "latitude", "bathymetry"]
)
return data


def load_usgs_quakes():
def load_usgs_quakes(**kwargs):
"""
Load a table of global earthquakes form the USGS as a pandas.DataFrame.
(Deprecated) Load a table of global earthquakes from the USGS as a
pandas.DataFrame.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="usgs_quakes")`` and will be removed in
v0.9.0.

This is the ``@usgs_quakes_22.txt`` dataset used in the GMT tutorials.

Expand All @@ -98,15 +219,28 @@ def load_usgs_quakes():
The data table. Use ``print(data.describe())`` to see the available
columns.
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be "
"removed in v0.9.0. Please use "
"load_sample_data(name='usgs_quakes') instead.",
category=FutureWarning,
stacklevel=2,
)
fname = which("@usgs_quakes_22.txt", download="c")
data = pd.read_csv(fname)
return data


def load_fractures_compilation():
def load_fractures_compilation(**kwargs):
"""
Load a table of fracture lengths and azimuths as hypothetically digitized
from geological maps as a pandas.DataFrame.
(Deprecated) Load a table of fracture lengths and azimuths as
hypothetically digitized from geological maps as a pandas.DataFrame.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="fractures")`` and will be removed in
v0.9.0.

This is the ``@fractures_06.txt`` dataset used in the GMT tutorials.

Expand All @@ -120,15 +254,28 @@ def load_fractures_compilation():
The data table. Use ``print(data.describe())`` to see the available
columns.
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be "
"removed in v0.9.0. Please use "
"load_sample_data(name='fractures') instead.",
category=FutureWarning,
stacklevel=2,
)
fname = which("@fractures_06.txt", download="c")
data = pd.read_csv(fname, header=None, sep=r"\s+", names=["azimuth", "length"])
return data[["length", "azimuth"]]


def load_hotspots():
def load_hotspots(**kwargs):
"""
Load a table with the locations, names, and suggested symbol sizes of
hotspots.
(Deprecated) Load a table with the locations, names, and suggested symbol
sizes of hotspots.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="hotspots")`` and will be removed in
v0.9.0.

This is the ``@hotspots.txt`` dataset used in the GMT tutorials, with data
from Mueller, Royer, and Lawver, 1993, Geology, vol. 21, pp. 275-278. The
Expand All @@ -145,15 +292,28 @@ def load_hotspots():
The data table with columns "longitude", "latitude", "symbol_size", and
"placename".
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be "
"removed in v0.9.0. Please use "
"load_sample_data(name='hotspots') instead.",
category=FutureWarning,
stacklevel=2,
)
fname = which("@hotspots.txt", download="c")
columns = ["longitude", "latitude", "symbol_size", "place_name"]
data = pd.read_table(filepath_or_buffer=fname, sep="\t", skiprows=3, names=columns)
return data


def load_mars_shape():
def load_mars_shape(**kwargs):
"""
Load a table of data for the shape of Mars.
(Deprecated) Load a table of data for the shape of Mars.

.. warning:: Deprecated since v0.6.0. This function has been replaced with
``load_sample_data(name="mars_shape")`` and will be removed in
v0.9.0.

This is the ``@mars370d.txt`` dataset used in GMT examples, with data and
information from Smith, D. E., and M. T. Zuber (1996), The shape of Mars
Expand All @@ -169,6 +329,15 @@ def load_mars_shape():
data : pandas.DataFrame
The data table with columns "longitude", "latitude", and "radius(m)".
"""

if "suppress_warning" not in kwargs:
warnings.warn(
"This function has been deprecated since v0.6.0 and will be "
"removed in v0.9.0. Please use "
"load_sample_data(name='mars_shape') instead.",
category=FutureWarning,
stacklevel=2,
)
fname = which("@mars370d.txt", download="c")
data = pd.read_csv(
fname, sep="\t", header=None, names=["longitude", "latitude", "radius(m)"]
Expand Down
Loading