From 7e00006b878fba7f44e8569beea42bfb6d5fc035 Mon Sep 17 00:00:00 2001 From: fjr Date: Sun, 22 Sep 2024 18:06:01 +0500 Subject: [PATCH 1/5] Fix docstring issues for pandas.Series.str.match and pandas.Series.sparse.sp_values --- .circleci/config.yml | 14 +++++- pandas/core/arrays/sparse/array.py | 32 +++++++++--- pandas/core/strings/accessor.py | 79 ++++++++++++++++-------------- 3 files changed, 78 insertions(+), 47 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 27b6829dcda70..9c986e5b1b054 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -92,7 +92,13 @@ jobs: no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that command: | pip3 install cibuildwheel==2.20.0 - cibuildwheel --output-dir wheelhouse + if [[ $CIBW_BUILD == cp313t* ]]; then + # TODO: temporarily run 3.13 free threaded builds without build isolation + # since we need pre-release cython + CIBW_BUILD_FRONTEND="pip; args: --no-build-isolation" cibuildwheel --output-dir wheelhouse + else + cibuildwheel --output-dir wheelhouse + fi environment: CIBW_BUILD: << parameters.cibw-build >> @@ -141,6 +147,10 @@ workflows: cibw-build: ["cp310-manylinux_aarch64", "cp311-manylinux_aarch64", "cp312-manylinux_aarch64", + "cp313-manylinux_aarch64", + "cp313t-manylinux_aarch64", "cp310-musllinux_aarch64", "cp311-musllinux_aarch64", - "cp312-musllinux_aarch64",] + "cp312-musllinux_aarch64", + "cp313-musllinux_aarch64", + "cp313t-musllinux_aarch64"] diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a09dc20af3b36..e9edd570ac5b2 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -601,15 +601,31 @@ def sp_index(self) -> SparseIndex: @property def sp_values(self) -> np.ndarray: """ - An ndarray containing the non- ``fill_value`` values. +An ndarray containing the non-``fill_value`` values. + +This method retrieves the non-fill values from a SparseArray. SparseArrays +are designed to efficiently store large arrays of data where most of the +elements are the same (the fill value). This method allows you to access +the actual data points that differ from the fill value. + +Returns +------- +ndarray + An array containing the non-fill values. + +See Also +-------- +Series.sparse.fill_value : The fill value for the SparseArray. +arrays.SparseArray : Represents an array with sparse data. + +Examples +-------- +>>> from pandas.arrays import SparseArray +>>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) +>>> s.sp_values +array([1, 2]) +""" - Examples - -------- - >>> from pandas.arrays import SparseArray - >>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) - >>> s.sp_values - array([1, 2]) - """ return self._sparse_values @property diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 6d10365a1b968..1392595e262c1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1364,44 +1364,49 @@ def contains( @forbid_nonstring_types(["bytes"]) def match(self, pat: str, case: bool = True, flags: int = 0, na=None): """ - Determine if each string starts with a match of a regular expression. +Determine if each string starts with a match of a regular expression. + +This method checks if each string in the Series starts with a substring +that matches the given regular expression pattern. It returns a boolean +Series indicating whether each string meets the condition. + +Parameters +---------- +pat : str + Character sequence. +case : bool, default True + If True, case sensitive. +flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. +na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + +Returns +------- +Series/Index/array of boolean values + A Series, Index, or array of boolean values indicating whether the start + of each string matches the pattern. The result will be of the same type + as the input. + +See Also +-------- +fullmatch : Stricter matching that requires the entire string to match. +contains : Analogous, but less strict, relying on re.search instead of + re.match. +extract : Extract matched groups. + +Examples +-------- +>>> ser = pd.Series(["horse", "eagle", "donkey"]) +>>> ser.str.match("e") +0 False +1 True +2 False +dtype: bool +""" - Parameters - ---------- - pat : str - Character sequence. - case : bool, default True - If True, case sensitive. - flags : int, default 0 (no flags) - Regex module flags, e.g. re.IGNORECASE. - na : scalar, optional - Fill value for missing values. The default depends on dtype of the - array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, - ``pandas.NA`` is used. - - Returns - ------- - Series/Index/array of boolean values - A Series, Index, or array of boolean values indicating whether the start - of each string matches the pattern. The result will be of the same type - as the input. - - See Also - -------- - fullmatch : Stricter matching that requires the entire string to match. - contains : Analogous, but less strict, relying on re.search instead of - re.match. - extract : Extract matched groups. - - Examples - -------- - >>> ser = pd.Series(["horse", "eagle", "donkey"]) - >>> ser.str.match("e") - 0 False - 1 True - 2 False - dtype: bool - """ result = self._data.array._str_match(pat, case=case, flags=flags, na=na) return self._wrap_result(result, fill_value=na, returns_string=False) From 472489b0b7a3ca7cd68dbace24fc3f7fe0ff7b8a Mon Sep 17 00:00:00 2001 From: fjr Date: Sun, 22 Sep 2024 18:19:25 +0500 Subject: [PATCH 2/5] Remove methods from code_checks.sh after fixing docstrings --- ci/code_checks.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f2d9f582d8932..f6527ca18b41c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.sparse.fill_value SA01" \ -i "pandas.Series.sparse.from_coo PR07,SA01" \ -i "pandas.Series.sparse.npoints SA01" \ - -i "pandas.Series.sparse.sp_values SA01" \ -i "pandas.Timedelta.max PR02" \ -i "pandas.Timedelta.min PR02" \ -i "pandas.Timedelta.resolution PR02" \ From 8ef300eb459609fd5fd64a4051937414ed4629a1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 22 Sep 2024 15:42:34 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/core/arrays/sparse/array.py | 48 ++++++++--------- pandas/core/strings/accessor.py | 84 +++++++++++++++--------------- 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index e9edd570ac5b2..f3eaca364a240 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -601,30 +601,30 @@ def sp_index(self) -> SparseIndex: @property def sp_values(self) -> np.ndarray: """ -An ndarray containing the non-``fill_value`` values. - -This method retrieves the non-fill values from a SparseArray. SparseArrays -are designed to efficiently store large arrays of data where most of the -elements are the same (the fill value). This method allows you to access -the actual data points that differ from the fill value. - -Returns -------- -ndarray - An array containing the non-fill values. - -See Also --------- -Series.sparse.fill_value : The fill value for the SparseArray. -arrays.SparseArray : Represents an array with sparse data. - -Examples --------- ->>> from pandas.arrays import SparseArray ->>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) ->>> s.sp_values -array([1, 2]) -""" + An ndarray containing the non-``fill_value`` values. + + This method retrieves the non-fill values from a SparseArray. SparseArrays + are designed to efficiently store large arrays of data where most of the + elements are the same (the fill value). This method allows you to access + the actual data points that differ from the fill value. + + Returns + ------- + ndarray + An array containing the non-fill values. + + See Also + -------- + Series.sparse.fill_value : The fill value for the SparseArray. + arrays.SparseArray : Represents an array with sparse data. + + Examples + -------- + >>> from pandas.arrays import SparseArray + >>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) + >>> s.sp_values + array([1, 2]) + """ return self._sparse_values diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1392595e262c1..6a23b4e2d7a4f 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1364,48 +1364,48 @@ def contains( @forbid_nonstring_types(["bytes"]) def match(self, pat: str, case: bool = True, flags: int = 0, na=None): """ -Determine if each string starts with a match of a regular expression. - -This method checks if each string in the Series starts with a substring -that matches the given regular expression pattern. It returns a boolean -Series indicating whether each string meets the condition. - -Parameters ----------- -pat : str - Character sequence. -case : bool, default True - If True, case sensitive. -flags : int, default 0 (no flags) - Regex module flags, e.g. re.IGNORECASE. -na : scalar, optional - Fill value for missing values. The default depends on dtype of the - array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, - ``pandas.NA`` is used. - -Returns -------- -Series/Index/array of boolean values - A Series, Index, or array of boolean values indicating whether the start - of each string matches the pattern. The result will be of the same type - as the input. - -See Also --------- -fullmatch : Stricter matching that requires the entire string to match. -contains : Analogous, but less strict, relying on re.search instead of - re.match. -extract : Extract matched groups. - -Examples --------- ->>> ser = pd.Series(["horse", "eagle", "donkey"]) ->>> ser.str.match("e") -0 False -1 True -2 False -dtype: bool -""" + Determine if each string starts with a match of a regular expression. + + This method checks if each string in the Series starts with a substring + that matches the given regular expression pattern. It returns a boolean + Series indicating whether each string meets the condition. + + Parameters + ---------- + pat : str + Character sequence. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + + Returns + ------- + Series/Index/array of boolean values + A Series, Index, or array of boolean values indicating whether the start + of each string matches the pattern. The result will be of the same type + as the input. + + See Also + -------- + fullmatch : Stricter matching that requires the entire string to match. + contains : Analogous, but less strict, relying on re.search instead of + re.match. + extract : Extract matched groups. + + Examples + -------- + >>> ser = pd.Series(["horse", "eagle", "donkey"]) + >>> ser.str.match("e") + 0 False + 1 True + 2 False + dtype: bool + """ result = self._data.array._str_match(pat, case=case, flags=flags, na=na) return self._wrap_result(result, fill_value=na, returns_string=False) From 9ff14bc27cfd63d7f06fcb387d985a6309621f82 Mon Sep 17 00:00:00 2001 From: fjr Date: Mon, 23 Sep 2024 05:46:01 +0500 Subject: [PATCH 4/5] Trigger CI re-run --- doc/source/user_guide/style.ipynb | 390 ++++++++++++------------ pandas/core/_numba/kernels/min_max_.py | 6 +- pandas/core/dtypes/astype.py | 2 +- pandas/core/dtypes/cast.py | 8 +- pandas/core/nanops.py | 9 +- pandas/io/excel/_odfreader.py | 3 +- pandas/tests/arithmetic/test_numeric.py | 4 +- pandas/tests/frame/test_query_eval.py | 2 +- 8 files changed, 203 insertions(+), 221 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index daecfce6ecebc..1afc5d0cf3b3c 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -46,7 +46,6 @@ }, "outputs": [], "source": [ - "import matplotlib.pyplot\n", "# We have this here to trigger matplotlib's font cache stuff.\n", "# This cell is hidden from the output" ] @@ -78,7 +77,6 @@ "source": [ "import pandas as pd\n", "import numpy as np\n", - "import matplotlib as mpl\n", "\n", "df = pd.DataFrame({\n", " \"strings\": [\"Adam\", \"Mike\"],\n", @@ -104,11 +102,11 @@ "metadata": {}, "outputs": [], "source": [ - "weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n", + "weather_df = pd.DataFrame(np.random.rand(10,2)*5,\n", " index=pd.date_range(start=\"2021-01-01\", periods=10),\n", " columns=[\"Tokyo\", \"Beijing\"])\n", "\n", - "def rain_condition(v): \n", + "def rain_condition(v):\n", " if v < 1.75:\n", " return \"Dry\"\n", " elif v < 2.75:\n", @@ -227,9 +225,9 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.DataFrame([[38.0, 2.0, 18.0, 22.0, 21, np.nan],[19, 439, 6, 452, 226,232]], \n", - " index=pd.Index(['Tumour (Positive)', 'Non-Tumour (Negative)'], name='Actual Label:'), \n", - " columns=pd.MultiIndex.from_product([['Decision Tree', 'Regression', 'Random'],['Tumour', 'Non-Tumour']], names=['Model:', 'Predicted:']))\n", + "df = pd.DataFrame([[38.0, 2.0, 18.0, 22.0, 21, np.nan],[19, 439, 6, 452, 226,232]],\n", + " index=pd.Index([\"Tumour (Positive)\", \"Non-Tumour (Negative)\"], name=\"Actual Label:\"),\n", + " columns=pd.MultiIndex.from_product([[\"Decision Tree\", \"Regression\", \"Random\"],[\"Tumour\", \"Non-Tumour\"]], names=[\"Model:\", \"Predicted:\"]))\n", "df.style" ] }, @@ -243,62 +241,62 @@ "source": [ "# Hidden cell to just create the below example: code is covered throughout the guide.\n", "s = df.style\\\n", - " .hide([('Random', 'Tumour'), ('Random', 'Non-Tumour')], axis='columns')\\\n", - " .format('{:.0f}')\\\n", + " .hide([(\"Random\", \"Tumour\"), (\"Random\", \"Non-Tumour\")], axis=\"columns\")\\\n", + " .format(\"{:.0f}\")\\\n", " .set_table_styles([{\n", - " 'selector': '',\n", - " 'props': 'border-collapse: separate;'\n", + " \"selector\": \"\",\n", + " \"props\": \"border-collapse: separate;\"\n", " },{\n", - " 'selector': 'caption',\n", - " 'props': 'caption-side: bottom; font-size:1.3em;'\n", + " \"selector\": \"caption\",\n", + " \"props\": \"caption-side: bottom; font-size:1.3em;\"\n", " },{\n", - " 'selector': '.index_name',\n", - " 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'\n", + " \"selector\": \".index_name\",\n", + " \"props\": \"font-style: italic; color: darkgrey; font-weight:normal;\"\n", " },{\n", - " 'selector': 'th:not(.index_name)',\n", - " 'props': 'background-color: #000066; color: white;'\n", + " \"selector\": \"th:not(.index_name)\",\n", + " \"props\": \"background-color: #000066; color: white;\"\n", " },{\n", - " 'selector': 'th.col_heading',\n", - " 'props': 'text-align: center;'\n", + " \"selector\": \"th.col_heading\",\n", + " \"props\": \"text-align: center;\"\n", " },{\n", - " 'selector': 'th.col_heading.level0',\n", - " 'props': 'font-size: 1.5em;'\n", + " \"selector\": \"th.col_heading.level0\",\n", + " \"props\": \"font-size: 1.5em;\"\n", " },{\n", - " 'selector': 'th.col2',\n", - " 'props': 'border-left: 1px solid white;'\n", + " \"selector\": \"th.col2\",\n", + " \"props\": \"border-left: 1px solid white;\"\n", " },{\n", - " 'selector': '.col2',\n", - " 'props': 'border-left: 1px solid #000066;'\n", + " \"selector\": \".col2\",\n", + " \"props\": \"border-left: 1px solid #000066;\"\n", " },{\n", - " 'selector': 'td',\n", - " 'props': 'text-align: center; font-weight:bold;'\n", + " \"selector\": \"td\",\n", + " \"props\": \"text-align: center; font-weight:bold;\"\n", " },{\n", - " 'selector': '.true',\n", - " 'props': 'background-color: #e6ffe6;'\n", + " \"selector\": \".true\",\n", + " \"props\": \"background-color: #e6ffe6;\"\n", " },{\n", - " 'selector': '.false',\n", - " 'props': 'background-color: #ffe6e6;'\n", + " \"selector\": \".false\",\n", + " \"props\": \"background-color: #ffe6e6;\"\n", " },{\n", - " 'selector': '.border-red',\n", - " 'props': 'border: 2px dashed red;'\n", + " \"selector\": \".border-red\",\n", + " \"props\": \"border: 2px dashed red;\"\n", " },{\n", - " 'selector': '.border-green',\n", - " 'props': 'border: 2px dashed green;'\n", + " \"selector\": \".border-green\",\n", + " \"props\": \"border: 2px dashed green;\"\n", " },{\n", - " 'selector': 'td:hover',\n", - " 'props': 'background-color: #ffffb3;'\n", + " \"selector\": \"td:hover\",\n", + " \"props\": \"background-color: #ffffb3;\"\n", " }])\\\n", - " .set_td_classes(pd.DataFrame([['true border-green', 'false', 'true', 'false border-red', '', ''],\n", - " ['false', 'true', 'false', 'true', '', '']], \n", + " .set_td_classes(pd.DataFrame([[\"true border-green\", \"false\", \"true\", \"false border-red\", \"\", \"\"],\n", + " [\"false\", \"true\", \"false\", \"true\", \"\", \"\"]],\n", " index=df.index, columns=df.columns))\\\n", " .set_caption(\"Confusion matrix for multiple cancer prediction models.\")\\\n", - " .set_tooltips(pd.DataFrame([['This model has a very strong true positive rate', '', '', \"This model's total number of false negatives is too high\", '', ''],\n", - " ['', '', '', '', '', '']], \n", + " .set_tooltips(pd.DataFrame([[\"This model has a very strong true positive rate\", \"\", \"\", \"This model's total number of false negatives is too high\", \"\", \"\"],\n", + " [\"\", \"\", \"\", \"\", \"\", \"\"]],\n", " index=df.index, columns=df.columns),\n", - " css_class='pd-tt', props=\n", - " 'visibility: hidden; position: absolute; z-index: 1; border: 1px solid #000066;'\n", - " 'background-color: white; color: #000066; font-size: 0.8em;' \n", - " 'transform: translate(0px, -24px); padding: 0.6em; border-radius: 0.5em;')\n" + " css_class=\"pd-tt\", props=\n", + " \"visibility: hidden; position: absolute; z-index: 1; border: 1px solid #000066;\"\n", + " \"background-color: white; color: #000066; font-size: 0.8em;\"\n", + " \"transform: translate(0px, -24px); padding: 0.6em; border-radius: 0.5em;\")\n" ] }, { @@ -325,7 +323,7 @@ "metadata": {}, "outputs": [], "source": [ - "s = df.style.format('{:.0f}').hide([('Random', 'Tumour'), ('Random', 'Non-Tumour')], axis=\"columns\")\n", + "s = df.style.format(\"{:.0f}\").hide([(\"Random\", \"Tumour\"), (\"Random\", \"Non-Tumour\")], axis=\"columns\")\n", "s" ] }, @@ -337,8 +335,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('after_hide')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"after_hide\")" ] }, { @@ -395,16 +393,16 @@ "outputs": [], "source": [ "cell_hover = { # for row hover use instead of \n", - " 'selector': 'td:hover',\n", - " 'props': [('background-color', '#ffffb3')]\n", + " \"selector\": \"td:hover\",\n", + " \"props\": [(\"background-color\", \"#ffffb3\")]\n", "}\n", "index_names = {\n", - " 'selector': '.index_name',\n", - " 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'\n", + " \"selector\": \".index_name\",\n", + " \"props\": \"font-style: italic; color: darkgrey; font-weight:normal;\"\n", "}\n", "headers = {\n", - " 'selector': 'th:not(.index_name)',\n", - " 'props': 'background-color: #000066; color: white;'\n", + " \"selector\": \"th:not(.index_name)\",\n", + " \"props\": \"background-color: #000066; color: white;\"\n", "}\n", "s.set_table_styles([cell_hover, index_names, headers])" ] @@ -417,8 +415,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('after_tab_styles1')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"after_tab_styles1\")" ] }, { @@ -435,9 +433,9 @@ "outputs": [], "source": [ "s.set_table_styles([\n", - " {'selector': 'th.col_heading', 'props': 'text-align: center;'},\n", - " {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},\n", - " {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},\n", + " {\"selector\": \"th.col_heading\", \"props\": \"text-align: center;\"},\n", + " {\"selector\": \"th.col_heading.level0\", \"props\": \"font-size: 1.5em;\"},\n", + " {\"selector\": \"td\", \"props\": \"text-align: center; font-weight: bold;\"},\n", "], overwrite=False)" ] }, @@ -449,8 +447,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('after_tab_styles2')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"after_tab_styles2\")" ] }, { @@ -469,8 +467,8 @@ "outputs": [], "source": [ "s.set_table_styles({\n", - " ('Regression', 'Tumour'): [{'selector': 'th', 'props': 'border-left: 1px solid white'},\n", - " {'selector': 'td', 'props': 'border-left: 1px solid #000066'}]\n", + " (\"Regression\", \"Tumour\"): [{\"selector\": \"th\", \"props\": \"border-left: 1px solid white\"},\n", + " {\"selector\": \"td\", \"props\": \"border-left: 1px solid #000066\"}]\n", "}, overwrite=False, axis=0)" ] }, @@ -482,8 +480,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('xyz01')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"xyz01\")" ] }, { @@ -508,7 +506,7 @@ "outputs": [], "source": [ "out = s.set_table_attributes('class=\"my-table-cls\"').to_html()\n", - "print(out[out.find('