From 948392f909ac81e290f20885de15afc102877adb Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Mon, 30 Dec 2024 01:00:10 +0000 Subject: [PATCH] docs (#265) --- docs/getting/faq.rst | 42 ---------------------------- docs/getting/index.rst | 2 +- docs/getting/projects.rst | 57 ++++++++++++++++++++++++++++++++++++++ docs/getting/tutorial.rst | 16 ++++++++++- docs/user/common.rst | 56 ++----------------------------------- docs/user/index.rst | 1 + docs/user/initializing.rst | 50 +++++++++++++++++++++++---------- docs/user/numpy.rst | 40 ++++++++++++++++++++++++++ requirements_docs.txt | 1 + 9 files changed, 152 insertions(+), 113 deletions(-) delete mode 100644 docs/getting/faq.rst create mode 100644 docs/getting/projects.rst create mode 100644 docs/user/numpy.rst diff --git a/docs/getting/faq.rst b/docs/getting/faq.rst deleted file mode 100644 index 75d7db7..0000000 --- a/docs/getting/faq.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _faq: - -Frequently asked questions -========================== - - -Why the name *Pint*? --------------------- - -Pint is a unit and sounds like Python in the first syllable. Most important, it is a good unit for beer. - - -You mention other similar Python libraries. Can you point me to those? ----------------------------------------------------------------------- - -`natu `_ - -`Buckingham `_ - -`Magnitude `_ - -`SciMath `_ - -`Python-quantities `_ - -`Unum `_ - -`Units `_ - -`udunitspy `_ - -`SymPy `_ - -`cf units `_ - -`astropy units `_ - -`yt `_ - -`measurement `_ - -If you're aware of another one, please contribute a patch to the docs. diff --git a/docs/getting/index.rst b/docs/getting/index.rst index 6e000fc..5f80c85 100644 --- a/docs/getting/index.rst +++ b/docs/getting/index.rst @@ -53,4 +53,4 @@ That's all! You can check that Pint is correctly installed by starting up python :hidden: tutorial - faq + projects diff --git a/docs/getting/projects.rst b/docs/getting/projects.rst new file mode 100644 index 0000000..39ef70d --- /dev/null +++ b/docs/getting/projects.rst @@ -0,0 +1,57 @@ +***************************** +Pint-Pandas in your projects +***************************** + +Using a Shared Unit Registry +---------------------------- + +As described `in the documentation of the main pint package: `_: + + If you use Pint in multiple modules within your Python package, you normally want to avoid creating multiple instances of the unit registry. The best way to do this is by instantiating the registry in a single place. For example, you can add the following code to your package ``__init__.py`` + +When using `pint_pandas`, this extends to using the same unit registry that was created by the main `pint` package. This is done by using the :func:`pint.get_application_registry() ` function. + +In a sample project structure of this kind: + +.. code-block:: text + + . + └── mypackage/ + ├── __init__.py + ├── main.py + └── mysubmodule/ + ├── __init__.py + └── calculations.py + +After defining the registry in the ``mypackage.__init__`` module: + +.. code-block:: python + + from pint import UnitRegistry, set_application_registry + ureg = UnitRegistry() + ureg.formatter.default_format = "P" + + set_application_registry(ureg) + +In the ``mypackage.mysubmodule.calculations`` module, you should *get* the shared registry like so: + +.. code-block:: python + + import pint + ureg = pint.get_application_registry() + + @ureg.check( + '[length]', + ) + def multiply_value(distance): + return distance * 2 + +Failure to use the application registry will result in a ``DimensionalityError`` of the kind: + + Cannot convert from ' ' ([]) to 'a quantity of' ([])". + +For example: + +.. code-block:: text + + DimensionalityError: Cannot convert from '200 metric_ton' ([mass]) to 'a quantity of' ([mass])" diff --git a/docs/getting/tutorial.rst b/docs/getting/tutorial.rst index f8f1b48..ae7cf6c 100644 --- a/docs/getting/tutorial.rst +++ b/docs/getting/tutorial.rst @@ -49,7 +49,7 @@ Operations with columns are units aware so behave as we would intuitively expect Notice that the units are not displayed in the cells of the DataFrame. If you ever see units in the cells of the DataFrame, something isn't right. - See :ref:`units_in_cells` for more information. + See :doc:`Units in Cells <../user/common>` for more information. We can see the columns' units in the dtypes attribute @@ -75,6 +75,17 @@ The PintArray contains a Quantity df.power.values.quantity +DataFrame Index +----------------------- + +PintArrays can be used as the DataFrame's index. + +.. ipython:: python + + time = pd.Series([1, 2, 2, 3], dtype="pint[second]") + df.index = time + df.index + Pandas Series Accessors ----------------------- Pandas Series accessors are provided for most Quantity properties and methods. @@ -84,3 +95,6 @@ Methods that return arrays will be converted to Series. df.power.pint.units df.power.pint.to("kW") + + +That's the basics! More examples are given at :doc:`Reading from csv <../user/reading>`. diff --git a/docs/user/common.rst b/docs/user/common.rst index 6bbd74f..96b94aa 100644 --- a/docs/user/common.rst +++ b/docs/user/common.rst @@ -7,7 +7,7 @@ Common Issues Pandas support for ``ExtensionArray`` is still in development. As a result, there are some common issues that pint-pandas users may encounter. This page provides some guidance on how to resolve these issues. -Units in Cells (Object dtype columns) +Units in Cells ------------------------------------- The most common issue pint-pandas users encouter is that they have a DataFrame with column that aren't PintArrays. @@ -58,63 +58,11 @@ Creating DataFrames from Series The default operation of Pandas `pd.concat` function is to perform row-wise concatenation. When given a list of Series, each of which is backed by a PintArray, this will inefficiently convert all the PintArrays to arrays of `object` type, concatenate the several series into a DataFrame with that many rows, and then leave it up to you to convert that DataFrame back into column-wise PintArrays. A much more efficient approach is to concatenate Series in a column-wise fashion: .. ipython:: python - :suppress: :okexcept: list_of_series = [pd.Series([1.0, 2.0], dtype="pint[m]") for i in range(0, 10)] df = pd.concat(list_of_series, axis=1) + df This will preserve all the PintArrays in each of the Series. - - -Using a Shared Unit Registry ----------------------------- - -As described `in the documentation of the main pint package: `_: - - If you use Pint in multiple modules within your Python package, you normally want to avoid creating multiple instances of the unit registry. The best way to do this is by instantiating the registry in a single place. For example, you can add the following code to your package ``__init__.py`` - -When using `pint_pandas`, this extends to using the same unit registry that was created by the main `pint` package. This is done by using the :func:`pint.get_application_registry() ` function. - -In a sample project structure of this kind: - -.. code-block:: text - - . - └── mypackage/ - ├── __init__.py - ├── main.py - └── mysubmodule/ - ├── __init__.py - └── calculations.py - -After defining the registry in the ``mypackage.__init__`` module: - -.. code-block:: python - - import pint - ureg = pint.get_application_registry() - -In the ``mypackage.mysubmodule.calculations`` module, you should *get* the shared registry like so: - -.. code-block:: python - - import pint - ureg = pint.get_application_registry() - - @ureg.check( - '[length]', - ) - def multiply_value(distance): - return distance * 2 - -Failure to do this will result in a ``DimensionalityError`` of the kind: - - Cannot convert from ' ' ([]) to 'a quantity of' ([])". - -For example: - -.. code-block:: text - - DimensionalityError: Cannot convert from '200 metric_ton' ([mass]) to 'a quantity of' ([mass])" diff --git a/docs/user/index.rst b/docs/user/index.rst index ccdd4b0..c36172f 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -11,4 +11,5 @@ examples that describe many common tasks that you can accomplish with pint. reading initializing + numpy common diff --git a/docs/user/initializing.rst b/docs/user/initializing.rst index e843300..291b115 100644 --- a/docs/user/initializing.rst +++ b/docs/user/initializing.rst @@ -4,20 +4,23 @@ Initializing data ************************** -There are several ways to initialize a `PintArray`s` in a `DataFrame`. Here's the most common methods. We'll use `PA_` and `Q_` as shorthand for `PintArray` and `Quantity`. - - +There are several ways to initialize a ``PintArray`` in a ``DataFrame``. Here's the most common methods. .. ipython:: python - :okwarning: + :suppress: import pandas as pd import pint import pint_pandas + import numpy as np - PA_ = pint_pandas.PintArray + PintArray = pint_pandas.PintArray ureg = pint_pandas.PintType.ureg - Q_ = ureg.Quantity + Quantity = ureg.Quantity + + +.. ipython:: python + :okwarning: df = pd.DataFrame( { @@ -25,12 +28,12 @@ There are several ways to initialize a `PintArray`s` in a `DataFrame`. Here's th "Ser2": pd.Series([1, 2]).astype("pint[m]"), "Ser3": pd.Series([1, 2], dtype="pint[m][Int64]"), "Ser4": pd.Series([1, 2]).astype("pint[m][Int64]"), - "PArr1": PA_([1, 2], dtype="pint[m]"), - "PArr2": PA_([1, 2], dtype="pint[m][Int64]"), - "PArr3": PA_([1, 2], dtype="m"), - "PArr4": PA_([1, 2], dtype=ureg.m), - "PArr5": PA_(Q_([1, 2], ureg.m)), - "PArr6": PA_([1, 2],"m"), + "PArr1": PintArray([1, 2], dtype="pint[m]"), + "PArr2": PintArray([1, 2], dtype="pint[m][Int64]"), + "PArr3": PintArray([1, 2], dtype="m"), + "PArr4": PintArray([1, 2], dtype=ureg.m), + "PArr5": PintArray(Quantity([1, 2], ureg.m)), + "PArr6": PintArray([1, 2],"m"), } ) df @@ -43,11 +46,28 @@ In the first two Series examples above, the data was converted to Float64. df.dtypes -To avoid this conversion, specify the subdtype (dtype of the magnitudes) in the dtype `"pint[m][Int64]"` when constructing using a `Series`. The default data dtype that pint-pandas converts to can be changed by modifying `pint_pandas.DEFAULT_SUBDTYPE`. +To avoid this conversion, specify the subdtype (dtype of the magnitudes) in the dtype ``"pint[m][Int64]"`` when constructing using a ``Series``. The default data dtype that pint-pandas converts to can be changed by modifying ``pint_pandas.DEFAULT_SUBDTYPE``. -`PintArray` infers the subdtype from the data passed into it when there is no subdtype specified in the dtype. It also accepts a pint `Unit`` or unit string as the dtype. +``PintArray`` infers the subdtype from the data passed into it when there is no subdtype specified in the dtype. It also accepts a pint ``Unit`` or unit string as the dtype. .. note:: - `"pint[unit]"` or `"pint[unit][subdtype]"` must be used for the Series or DataFrame constuctor. + ``"pint[unit]"`` or ``"pint[unit][subdtype]"`` must be used for the Series or DataFrame constuctor. + +Non-native pandas dtypes +------------------------- + +``PintArray`` uses an ``ExtensionArray`` to hold its data inclluding those from other libraries that extend pandas. +For example, an ``UncertaintyArray`` can be used. + +.. ipython:: python + + from uncertainties_pandas import UncertaintyArray, UncertaintyDtype + from uncertainties import ufloat, umath, unumpy + + ufloats = [ufloat(i, abs(i) / 100) for i in [4.0, np.nan, -5.0]] + uarr = UncertaintyArray(ufloats) + uarr + PintArray(uarr,"m") + pd.Series(PintArray(uarr,"m")*2) diff --git a/docs/user/numpy.rst b/docs/user/numpy.rst new file mode 100644 index 0000000..91a90f7 --- /dev/null +++ b/docs/user/numpy.rst @@ -0,0 +1,40 @@ +.. _numpy: + +************************** +Numpy support +************************** + +Numpy functions that work on pint ``Quantity`` ``ndarray`` objects also work on ``PintArray``. + + +.. ipython:: python + :suppress: + + import pandas as pd + import pint + import pint_pandas + import numpy as np + + PintArray = pint_pandas.PintArray + ureg = pint_pandas.PintType.ureg + Quantity = ureg.Quantity + +.. ipython:: python + + pa = PintArray([1, 2, np.nan, 4, 10], dtype="pint[m]") + np.clip(pa, 3 * ureg.m, 5 * ureg.m) + +Note that this function errors when applied to a ``Series``. + +.. ipython:: python + :okexcept: + + df = pd.DataFrame({"A": pa}) + np.clip(df['A'], 3 * ureg.m, 5 * ureg.m) + +Apply the function to the ``PintArray`` instead of the ``Series`` using ``Series.values``. + +.. ipython:: python + :okexcept: + + np.clip(df['A'].values, 3 * ureg.m, 5 * ureg.m) diff --git a/requirements_docs.txt b/requirements_docs.txt index c8ae06e..93be184 100644 --- a/requirements_docs.txt +++ b/requirements_docs.txt @@ -20,3 +20,4 @@ sphinx-book-theme>=1.1.0 sphinx_copybutton sphinx_design typing_extensions +uncertainties-pandas