Skip to content

Commit

Permalink
ARROW-15321: [Dev][Python] Also numpydoc-validate Cython-generated me…
Browse files Browse the repository at this point in the history
…thods

The numpydoc-validation routine in Archery would skip over many Cython-generated methods and properties.

This PR also fixes and enhances the docstrings that would newly raise validation errors.

Closes #12698 from pitrou/ARROW-15321-numpydoc-cython

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou committed Mar 28, 2022
1 parent 3eb5673 commit 6ab947b
Show file tree
Hide file tree
Showing 18 changed files with 851 additions and 238 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/docs_light.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ on:
pull_request:
paths:
- 'docs/**'
- '.github/workflows/docs.yml'
- '.github/workflows/docs_light.yml'
- 'ci/docker/conda.dockerfile'
- 'ci/docker/conda-cpp.dockerfile'
- 'ci/docker/conda-python.dockerfile'
Expand All @@ -37,12 +37,12 @@ env:
jobs:

light:
name: AMD64 Ubuntu 20.04 Sphinx Documentation
name: AMD64 Conda Python 3.9 Sphinx Documentation
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 30
env:
UBUNTU: "20.04"
PYTHON: "3.9"
steps:
- name: Checkout Arrow
uses: actions/checkout@v2
Expand All @@ -52,8 +52,8 @@ jobs:
uses: actions/cache@v2
with:
path: .docker
key: ubuntu-docs-${{ hashFiles('cpp/**') }}
restore-keys: ubuntu-docs-
key: conda-docs-${{ hashFiles('cpp/**') }}
restore-keys: conda-docs-
- name: Setup Python
uses: actions/setup-python@v2
with:
Expand Down
16 changes: 12 additions & 4 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,15 +303,22 @@ def lint(ctx, src, fix, iwyu_all, **checks):
sys.exit(1)


def _flatten_numpydoc_rules(rules):
flattened = []
for rule in rules:
flattened.extend(filter(None, rule.split(',')))
return flattened


@archery.command(short_help="Lint python docstring with NumpyDoc")
@click.argument('symbols', nargs=-1)
@click.option("--src", metavar="<arrow_src>", default=None,
callback=validate_arrow_sources,
help="Specify Arrow source directory")
@click.option("--allow-rule", "-a", multiple=True,
help="Allow only these rules")
help="Allow only these rules (can be comma-separated)")
@click.option("--disallow-rule", "-d", multiple=True,
help="Disallow these rules")
help="Disallow these rules (can be comma-separated)")
def numpydoc(src, symbols, allow_rule, disallow_rule):
"""
Pass list of modules or symbols as arguments to restrict the validation.
Expand All @@ -326,8 +333,9 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
"""
disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
try:
results = python_numpydoc(symbols, allow_rules=allow_rule,
disallow_rules=disallow_rule)
results = python_numpydoc(
symbols, allow_rules=_flatten_numpydoc_rules(allow_rule),
disallow_rules=_flatten_numpydoc_rules(disallow_rule))
for result in results:
result.ok()
except LintValidationException:
Expand Down
15 changes: 15 additions & 0 deletions dev/archery/archery/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,18 @@ def _import_pandas():
sys.modules['pyarrow'] = None
import pandas as pd
return pd


def _get_module(obj, *, default=None):
"""
Try to find the name of the module *obj* is defined on.
"""
try:
return obj.__module__
except AttributeError:
# Might be a method/property descriptor as generated by Cython,
# look up the enclosing class.
try:
return obj.__objclass__.__module__
except AttributeError:
return default
26 changes: 20 additions & 6 deletions dev/archery/archery/lang/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# specific language governing permissions and limitations
# under the License.

from contextlib import contextmanager
import inspect
import tokenize
from contextlib import contextmanager

try:
from numpydoc.validate import Docstring, validate
Expand All @@ -26,6 +26,7 @@
else:
have_numpydoc = True

from ..compat import _get_module
from ..utils.logger import logger
from ..utils.command import Command, capture_stdout, default_bin

Expand Down Expand Up @@ -118,8 +119,11 @@ def traverse(self, fn, obj, from_package):
Parameters
----------
fn : callable
A function to apply on all traversed objects.
obj : Any
from_package : string, default 'pyarrow'
The object to start from.
from_package : string
Predicate to only consider objects from this package.
"""
todo = [obj]
Expand All @@ -139,10 +143,20 @@ def traverse(self, fn, obj, from_package):
continue

member = getattr(obj, name)
module = getattr(member, '__module__', None)
if not (module and module.startswith(from_package)):
module = _get_module(member)
if module is None or not module.startswith(from_package):
continue

# Is it a Cython-generated method? If so, try to detect
# whether it only has a implicitly-generated docstring,
# and no user-defined docstring following it.
# The generated docstring would lack description of method
# parameters and therefore fail Numpydoc validation.
if hasattr(member, '__objclass__'):
doc = getattr(member, '__doc__', None)
# The Cython-generated docstring would be a one-liner,
# such as "ReadOptions.equals(self, ReadOptions other)".
if (doc and '\n' not in doc and f'.{name}(' in doc):
continue
todo.append(member)

@contextmanager
Expand Down Expand Up @@ -195,7 +209,7 @@ def callback(obj):
try:
result = validate(obj)
except OSError as e:
symbol = f"{obj.__module__}.{obj.__name__}"
symbol = f"{_get_module(obj, default='')}.{obj.__name__}"
logger.warning(f"Unable to validate `{symbol}` due to `{e}`")
return

Expand Down
3 changes: 2 additions & 1 deletion dev/archery/archery/utils/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import click

from .command import Bash, Command, default_bin
from ..compat import _get_module
from .cmake import CMake
from .git import git
from .logger import logger
Expand Down Expand Up @@ -284,7 +285,7 @@ def python_numpydoc(symbols=None, allow_rules=None, disallow_rules=None):
doc = getattr(obj, '__doc__', '')
name = getattr(obj, '__name__', '')
qualname = getattr(obj, '__qualname__', '')
module = getattr(obj, '__module__', '')
module = _get_module(obj, default='')
instance = getattr(obj, '__self__', '')
if instance:
klass = instance.__class__.__name__
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ services:
["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule PR01"]
archery numpydoc --allow-rule PR01,PR10"]

conda-python-dask:
# Possible $DASK parameters:
Expand Down
78 changes: 72 additions & 6 deletions python/pyarrow/_compute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,17 @@ cdef class Function(_Weakrefable):
MemoryPool memory_pool=None):
"""
Call the function on the given arguments.
Parameters
----------
args : iterable
The arguments to pass to the function. Accepted types depend
on the specific function.
options : FunctionOptions, optional
Options instance for executing this function. This should have
the right concrete options type.
memory_pool : pyarrow.MemoryPool, optional
If not passed, will allocate memory from the default memory pool.
"""
cdef:
const CFunctionOptions* c_options = NULL
Expand Down Expand Up @@ -2005,8 +2016,8 @@ cdef class Expression(_Weakrefable):
``|`` (logical or) and ``~`` (logical not).
Note: python keywords ``and``, ``or`` and ``not`` cannot be used
to combine expressions.
- Check whether the expression is contained in a list of values with
the ``pyarrow.compute.Expression.isin()`` member function.
- Create expression predicates using Expression methods such as
``pyarrow.compute.Expression.isin()``.
Examples
--------
Expand Down Expand Up @@ -2130,21 +2141,76 @@ cdef class Expression(_Weakrefable):
return Expression._call("divide_checked", [self, other])

def is_valid(self):
"""Checks whether the expression is not-null (valid)"""
"""
Check whether the expression is not-null (valid).
This creates a new expression equivalent to calling the
`is_valid` compute function on this expression.
Returns
-------
is_valid : Expression
"""
return Expression._call("is_valid", [self])

def is_null(self, bint nan_is_null=False):
"""Checks whether the expression is null"""
"""
Check whether the expression is null.
This creates a new expression equivalent to calling the
`is_null` compute function on this expression.
Parameters
----------
nan_is_null : boolean, default False
Whether floating-point NaNs are considered null.
Returns
-------
is_null : Expression
"""
options = NullOptions(nan_is_null=nan_is_null)
return Expression._call("is_null", [self], options)

def cast(self, type, bint safe=True):
"""Explicitly change the expression's data type"""
"""
Explicitly set or change the expression's data type.
This creates a new expression equivalent to calling the
`cast` compute function on this expression.
Parameters
----------
type : DataType
Type to cast array to.
safe : boolean, default True
Whether to check for conversion errors such as overflow.
Returns
-------
cast : Expression
"""
options = CastOptions.safe(ensure_type(type))
return Expression._call("cast", [self], options)

def isin(self, values):
"""Checks whether the expression is contained in values"""
"""
Check whether the expression is contained in values.
This creates a new expression equivalent to calling the
`is_in` compute function on this expression.
Parameters
----------
values : Array or iterable
The values to check for.
Returns
-------
isin : Expression
A new expression that, when evaluated, checks whether
this expression's value is contained in `values`.
"""
if not isinstance(values, Array):
values = lib.array(values)

Expand Down
Loading

0 comments on commit 6ab947b

Please sign in to comment.