GenericMappingTools · seisman · Dec 12, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py
@@ -168,24 +168,35 @@
         "date64[ms][pyarrow]": "datetime64[ms]",
     }
 
+    # The expected numpy dtype for the result numpy array, but can be None.
+    dtype = dtypes.get(str(getattr(data, "dtype", getattr(data, "type", ""))))
+
+    # pandas numeric dtypes were converted to np.object_ dtype prior pandas 2.2, and are
+    # converted to suitable NumPy dtypes since pandas 2.2. Refer to the following link
+    # for details: https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#to-numpy-for-numpy-nullable-and-arrow-types-converts-to-suitable-numpy-dtype
+    #
+    # Workarounds for pandas < 2.2. Following SPEC 0, pandas 2.1 should be dropped in
+    # 2025 Q3, so it's likely we can remove the workaround in PyGMT v0.17.0.
     if (
-        hasattr(data, "isna")
-        and data.isna().any()
-        and Version(pd.__version__) < Version("2.2")
-    ):
-        # Workaround for dealing with pd.NA with pandas < 2.2.
-        # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844
-        # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely
-        # we can remove the workaround in PyGMT v0.17.0.
-        array = np.ascontiguousarray(data.astype(float))
-    else:
-        vec_dtype = str(getattr(data, "dtype", getattr(data, "type", "")))
-        array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype))
+        Version(pd.__version__) < Version("2.2")  # pandas < 2.2 only.
+        and hasattr(data, "dtype")  # NumPy array or pandas objects only.
+        and hasattr(data.dtype, "numpy_dtype")  # pandas dtypes only.
+        and data.dtype.kind in "iuf"  # Numeric dtypes only.
+    ):  # pandas Series/Index with pandas nullable numeric dtypes.
+        dtype = data.dtype.numpy_dtype  # The expected numpy dtype.
+        if getattr(data, "hasnans", False):
+            if data.dtype.kind in "iu":
+                # Integers with missing values are converted to float64.
+                dtype = np.float64
+            data = data.to_numpy(na_value=np.nan)
+
+    array = np.ascontiguousarray(data, dtype=dtype)
 
     # Check if a np.object_ array can be converted to np.str_.
     if array.dtype == np.object_:
         with contextlib.suppress(TypeError, ValueError):
             return np.ascontiguousarray(array, dtype=np.str_)
+
     return array
 
 

diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py
@@ -32,6 +32,9 @@ def timestamp(unit: str, tz: str | None = None):
 
     _HAS_PYARROW = False
 
+# Mark tests that require pyarrow
+pa_marks = {"marks": skip_if_no(package="pyarrow")}
+
 
 def _check_result(result, expected_dtype):
     """
@@ -171,22 +174,105 @@ def test_to_numpy_numpy_string(dtype):
 # - BooleanDtype
 # - ArrowDtype: a special dtype used to store data in the PyArrow format.
 #
+# In pandas, PyArrow types can be specified using the following formats:
+#
+# - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]")
+# - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())")
+#
 # References:
 # 1. https://pandas.pydata.org/docs/reference/arrays.html
 # 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes
 # 3. https://pandas.pydata.org/docs/user_guide/pyarrow.html
 ########################################################################################
-@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params)
+@pytest.mark.parametrize(
+    ("dtype", "expected_dtype"),
+    [
+        *np_dtype_params,
+        pytest.param(pd.Int8Dtype(), np.int8, id="Int8"),
+        pytest.param(pd.Int16Dtype(), np.int16, id="Int16"),
+        pytest.param(pd.Int32Dtype(), np.int32, id="Int32"),
+        pytest.param(pd.Int64Dtype(), np.int64, id="Int64"),
+        pytest.param(pd.UInt8Dtype(), np.uint8, id="UInt8"),
+        pytest.param(pd.UInt16Dtype(), np.uint16, id="UInt16"),
+        pytest.param(pd.UInt32Dtype(), np.uint32, id="UInt32"),
+        pytest.param(pd.UInt64Dtype(), np.uint64, id="UInt64"),
+        pytest.param(pd.Float32Dtype(), np.float32, id="Float32"),
+        pytest.param(pd.Float64Dtype(), np.float64, id="Float64"),
+        pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]", **pa_marks),
+        pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]", **pa_marks),
+        pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]", **pa_marks),
+        pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]", **pa_marks),
+        pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]", **pa_marks),
+        pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]", **pa_marks),
+        pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]", **pa_marks),
+        pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]", **pa_marks),
+        pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks),
+        pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks),
+        pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks),
+    ],
+)
 def test_to_numpy_pandas_numeric(dtype, expected_dtype):
     """
     Test the _to_numpy function with pandas.Series of numeric dtypes.
     """
-    series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2]  # Not C-contiguous
+    data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+    if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"):
+        # float16 needs special handling for pandas < 2.2.
+        # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
+        data = np.array(data, dtype=np.float16)
+    series = pd.Series(data, dtype=dtype)[::2]  # Not C-contiguous
     result = _to_numpy(series)
     _check_result(result, expected_dtype)
     npt.assert_array_equal(result, series)
 
 
+@pytest.mark.parametrize(
+    ("dtype", "expected_dtype"),
+    [
+        pytest.param(np.float16, np.float16, id="float16"),
+        pytest.param(np.float32, np.float32, id="float32"),
+        pytest.param(np.float64, np.float64, id="float64"),
+        pytest.param(np.longdouble, np.longdouble, id="longdouble"),
+        pytest.param(pd.Int8Dtype(), np.float64, id="Int8"),
+        pytest.param(pd.Int16Dtype(), np.float64, id="Int16"),
+        pytest.param(pd.Int32Dtype(), np.float64, id="Int32"),
+        pytest.param(pd.Int64Dtype(), np.float64, id="Int64"),
+        pytest.param(pd.UInt8Dtype(), np.float64, id="UInt8"),
+        pytest.param(pd.UInt16Dtype(), np.float64, id="UInt16"),
+        pytest.param(pd.UInt32Dtype(), np.float64, id="UInt32"),
+        pytest.param(pd.UInt64Dtype(), np.float64, id="UInt64"),
+        pytest.param(pd.Float32Dtype(), np.float32, id="Float32"),
+        pytest.param(pd.Float64Dtype(), np.float64, id="Float64"),
+        pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]", **pa_marks),
+        pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]", **pa_marks),
+        pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]", **pa_marks),
+        pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]", **pa_marks),
+        pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]", **pa_marks),
+        pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]", **pa_marks),
+        pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]", **pa_marks),
+        pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]", **pa_marks),
+        pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks),
+        pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks),
+        pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks),
+    ],
+)
+def test_to_numpy_pandas_series_pandas_dtypes_numeric_with_na(dtype, expected_dtype):
+    """
+    Test the _to_numpy function with pandas.Series of NumPy/pandas/PyArrow numeric
+    dtypes and missing values (NA).
+    """
+    data = [1.0, 2.0, None, 4.0, 5.0, 6.0]
+    if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"):
+        # float16 needs special handling for pandas < 2.2.
+        # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
+        data = np.array(data, dtype=np.float16)
+    series = pd.Series(data, dtype=dtype)[::2]  # Not C-contiguous
+    assert series.isna().any()
+    result = _to_numpy(series)
+    _check_result(result, expected_dtype)
+    npt.assert_array_equal(result, np.array([1.0, np.nan, 5.0], dtype=expected_dtype))
+
+
 @pytest.mark.parametrize(
     "dtype",
     [

diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py
@@ -69,17 +69,6 @@ def test_vectors_to_arrays_not_c_contiguous():
     _check_arrays(arrays)
 
 
-def test_vectors_to_arrays_pandas_nan():
-    """
-    Test the vectors_to_arrays function with pandas Series containing NaNs.
-    """
-    vectors = [pd.Series(data=[0, 4, pd.NA, 8, 6], dtype=pd.Int32Dtype())]
-    arrays = vectors_to_arrays(vectors)
-    npt.assert_equal(arrays[0], np.array([0, 4, np.nan, 8, 6], dtype=np.float64))
-    assert arrays[0].dtype == np.float64
-    _check_arrays(arrays)
-
-
 @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed.")
 def test_vectors_to_arrays_pyarrow_datetime():
     """