From 90e8a1d3a8e4115194f4b4d2c70def32b70f19bf Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 23 Apr 2021 19:03:37 -0400 Subject: [PATCH] Support passing string type numbers, geographic coordinates and datetimes (#975) Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- doc/api/index.rst | 1 + pygmt/clib/session.py | 26 +++++------ pygmt/tests/test_clib_put_vector.py | 70 +++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 13 deletions(-) diff --git a/doc/api/index.rst b/doc/api/index.rst index 6460eb53e28..0c07c18a364 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -224,6 +224,7 @@ Low level access (these are mostly used by the :mod:`pygmt.clib` package): clib.Session.get_default clib.Session.create_data clib.Session.put_matrix + clib.Session.put_strings clib.Session.put_vector clib.Session.write_data clib.Session.open_virtual_file diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 3e79f558a8a..d13cb911091 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -60,6 +60,7 @@ np.uint64: "GMT_ULONG", np.uint32: "GMT_UINT", np.datetime64: "GMT_DATETIME", + np.str_: "GMT_TEXT", } @@ -719,9 +720,7 @@ def _check_dtype_and_dim(self, array, ndim): """ # check the array has the given dimension if array.ndim != ndim: - raise GMTInvalidInput( - "Expected a numpy 1d array, got {}d.".format(array.ndim) - ) + raise GMTInvalidInput(f"Expected a numpy 1d array, got {array.ndim}d.") # check the array has a valid/known data type if array.dtype.type not in DTYPES: @@ -745,7 +744,7 @@ def put_vector(self, dataset, column, vector): first. Use ``family='GMT_IS_DATASET|GMT_VIA_VECTOR'``. Not at all numpy dtypes are supported, only: float64, float32, int64, - int32, uint64, and uint32. + int32, uint64, uint32, datetime64 and str_. .. warning:: The numpy array must be C contiguous in memory. If it comes from a @@ -777,11 +776,14 @@ def put_vector(self, dataset, column, vector): ) gmt_type = self._check_dtype_and_dim(vector, ndim=1) - if gmt_type == self["GMT_DATETIME"]: + if gmt_type in (self["GMT_TEXT"], self["GMT_DATETIME"]): vector_pointer = (ctp.c_char_p * len(vector))() - vector_pointer[:] = np.char.encode( - np.datetime_as_string(array_to_datetime(vector)) - ) + if gmt_type == self["GMT_DATETIME"]: + vector_pointer[:] = np.char.encode( + np.datetime_as_string(array_to_datetime(vector)) + ) + else: + vector_pointer[:] = np.char.encode(vector) else: vector_pointer = vector.ctypes.data_as(ctp.c_void_p) status = c_put_vector( @@ -789,11 +791,9 @@ def put_vector(self, dataset, column, vector): ) if status != 0: raise GMTCLibError( - " ".join( - [ - "Failed to put vector of type {}".format(vector.dtype), - "in column {} of dataset.".format(column), - ] + ( + f"Failed to put vector of type {vector.dtype} " + f"in column {column} of dataset." ) ) diff --git a/pygmt/tests/test_clib_put_vector.py b/pygmt/tests/test_clib_put_vector.py index 72a4c858898..1d085eb92ca 100644 --- a/pygmt/tests/test_clib_put_vector.py +++ b/pygmt/tests/test_clib_put_vector.py @@ -2,6 +2,7 @@ Test the functions that put vector data into GMT. """ import itertools +from datetime import datetime import numpy as np import numpy.testing as npt @@ -90,6 +91,75 @@ def test_put_vector_mixed_dtypes(): npt.assert_allclose(newy, y) +def test_put_vector_string_dtype(): + """ + Passing string type vectors to a dataset. + """ + # input string vectors: numbers, longitudes, latitudes, and datetimes + vectors = np.array( + [ + ["10", "20.0", "-30.0", "3.5e1"], + ["10W", "30.50E", "30:30W", "40:30:30.500E"], + ["10N", "30.50S", "30:30N", "40:30:30.500S"], + ["2021-02-03", "2021-02-03T04", "2021-02-03T04:05:06.700", "T04:50:06.700"], + ] + ) + # output vectors in double or string type + # Notes: + # 1. longitudes and latitudes are stored in double in GMT + # 2. The default output format for datetime is YYYY-mm-ddTHH:MM:SS + expected_vectors = [ + [10.0, 20.0, -30.0, 35], + [-10, 30.5, -30.5, 40.508472], + [10, -30.50, 30.5, -40.508472], + [ + "2021-02-03T00:00:00", + "2021-02-03T04:00:00", + "2021-02-03T04:05:06", + f"{datetime.utcnow().strftime('%Y-%m-%d')}T04:50:06", + ], + ] + + # loop over all possible combinations of input types + for i, j in itertools.combinations_with_replacement(range(4), r=2): + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[2, 4, 1, 0], # columns, rows, layers, dtype + ) + lib.put_vector(dataset, column=lib["GMT_X"], vector=vectors[i]) + lib.put_vector(dataset, column=lib["GMT_Y"], vector=vectors[j]) + # Turns out wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_VECTOR", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data + output = np.genfromtxt( + tmp_file.name, dtype=None, names=("x", "y"), encoding=None + ) + # check that the output is correct + # Use npt.assert_allclose for numeric arrays + # and npt.assert_array_equal for string arrays + if i != 3: + npt.assert_allclose(output["x"], expected_vectors[i]) + else: + npt.assert_array_equal(output["x"], expected_vectors[i]) + if j != 3: + npt.assert_allclose(output["y"], expected_vectors[j]) + else: + npt.assert_array_equal(output["y"], expected_vectors[j]) + + def test_put_vector_invalid_dtype(): """ Check that it fails with an exception for invalid data types.