diff --git a/py-polars/.flake8 b/py-polars/.flake8 index 5ba5c442ac51..7b45c039ddd0 100644 --- a/py-polars/.flake8 +++ b/py-polars/.flake8 @@ -5,8 +5,10 @@ extend-ignore = # Satisfy black: https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8 E203, # pydocstyle: http://www.pydocstyle.org/en/stable/error_codes.html - # numpy convention with D413 (Missing blank line after last section) - D107, D203, D212, D402, D415, D416 + # numpy convention with a few additional lints + D107, D203, D212, D402, D415, D416, + # TODO: Remove errors below to further improve docstring linting + D1, D400, D205, per-file-ignores = __init__.py:F401 diff --git a/py-polars/build.requirements.txt b/py-polars/build.requirements.txt index a5fdc5c5736d..46ddb98193fb 100644 --- a/py-polars/build.requirements.txt +++ b/py-polars/build.requirements.txt @@ -21,6 +21,7 @@ mypy==0.961 ghp-import==2.1.0 flake8==4.0.1 flake8-bugbear==22.7.1 +flake8-docstrings==1.6.0 sphinx==4.2.0 pydata-sphinx-theme==0.6.3 sphinx-panels==0.6.0 diff --git a/py-polars/polars/_html.py b/py-polars/polars/_html.py index 4e9966e39057..f7ed32ffe84a 100644 --- a/py-polars/polars/_html.py +++ b/py-polars/polars/_html.py @@ -1,6 +1,4 @@ -""" -Module for formatting output data in HTML. -""" +"""Module for formatting output data in HTML.""" from __future__ import annotations import os @@ -12,6 +10,8 @@ class Tag: + """Class for representing an HTML tag.""" + def __init__( self, elements: list[str], @@ -72,9 +72,7 @@ def __init__( self.col_idx = range(0, df.width) def write_header(self) -> None: - """ - Writes the header of an HTML table. - """ + """Write the header of an HTML table.""" self.elements.append(f"shape: {self.df.shape}") with Tag(self.elements, "thead"): with Tag(self.elements, "tr"): @@ -95,9 +93,7 @@ def write_header(self) -> None: self.elements.append(dtypes[c]) def write_body(self) -> None: - """ - Writes the body of an HTML table. - """ + """Write the body of an HTML table.""" str_lengths = int(os.environ.get("POLARS_FMT_STR_LEN", "15")) with Tag(self.elements, "tbody"): for r in self.row_idx: @@ -129,9 +125,11 @@ def render(self) -> list[str]: class NotebookFormatter(HTMLFormatter): """ - Internal class for formatting output data in html for display in Jupyter - Notebooks. This class is intended for functionality specific to - DataFrame._repr_html_() and DataFrame.to_html(notebook=True) + Class for formatting output data in HTML for display in Jupyter Notebooks. + + This class is intended for functionality specific to DataFrame._repr_html_() + and DataFrame.to_html(notebook=True). + """ def write_style(self) -> None: @@ -166,9 +164,7 @@ def write_style(self) -> None: self.write(template) def render(self) -> list[str]: - """ - Return the lines needed to render a HTML table. - """ + """Return the lines needed to render a HTML table.""" with Tag(self.elements, "div"): self.write_style() super().render() diff --git a/py-polars/polars/cfg.py b/py-polars/polars/cfg.py index ffda37ae5557..94bbaea13158 100644 --- a/py-polars/polars/cfg.py +++ b/py-polars/polars/cfg.py @@ -6,9 +6,7 @@ class Config: - """ - Configure polars - """ + """Configure polars.""" # class-local boolean flags can be used for options that don't have # a Rust component (so no need to register environment variables). @@ -16,9 +14,7 @@ class Config: @classmethod def set_utf8_tables(cls) -> type[Config]: - """ - Use utf8 characters to print tables - """ + """Use utf8 characters to print tables.""" # os.unsetenv is automatically called if we remove a key from os.environ, # see https://docs.python.org/3/library/os.html#os.environ. However, we cannot # call os.unsetenv directly, as that fails on Windows @@ -27,21 +23,20 @@ def set_utf8_tables(cls) -> type[Config]: @classmethod def set_ascii_tables(cls) -> type[Config]: - """ - Use ascii characters to print tables - """ + """Use ascii characters to print tables.""" os.environ["POLARS_FMT_NO_UTF8"] = "1" return cls @classmethod def set_tbl_width_chars(cls, width: int) -> type[Config]: """ - Set the number of character used to draw the table + Set the number of character used to draw the table. Parameters ---------- width number of chars + """ os.environ["POLARS_TABLE_WIDTH"] = str(width) return cls @@ -49,12 +44,13 @@ def set_tbl_width_chars(cls, width: int) -> type[Config]: @classmethod def set_tbl_rows(cls, n: int) -> type[Config]: """ - Set the number of rows used to print tables + Set the number of rows used to print tables. Parameters ---------- n number of rows to print + """ os.environ["POLARS_FMT_MAX_ROWS"] = str(n) return cls @@ -62,7 +58,7 @@ def set_tbl_rows(cls, n: int) -> type[Config]: @classmethod def set_tbl_cols(cls, n: int) -> type[Config]: """ - Set the number of columns used to print tables + Set the number of columns used to print tables. Parameters ---------- @@ -100,30 +96,26 @@ def set_tbl_cols(cls, n: int) -> type[Config]: @classmethod def set_global_string_cache(cls) -> type[Config]: - """ - Turn on the global string cache - """ + """Turn on the global string cache.""" toggle_string_cache(True) return cls @classmethod def unset_global_string_cache(cls) -> type[Config]: - """ - Turn off the global string cache - """ + """Turn off the global string cache.""" toggle_string_cache(False) return cls @classmethod def set_fmt_str_lengths(cls, n: int) -> type[Config]: """ - Set the number of characters used to print string values + Set the number of characters used to print string values. Parameters ---------- n number of characters to print - """ + """ os.environ["POLARS_FMT_STR_LEN"] = str(n) return cls diff --git a/py-polars/polars/datatypes.py b/py-polars/polars/datatypes.py index f927d3708942..13a8fda5e6dc 100644 --- a/py-polars/polars/datatypes.py +++ b/py-polars/polars/datatypes.py @@ -34,9 +34,7 @@ def get_idx_type() -> Type[DataType]: class DataType: - """ - Base class for all Polars data types. - """ + """Base class for all Polars data types.""" def __new__(cls, *args: Any, **kwargs: Any) -> PolarsDataType: # type: ignore[misc] # this formulation allows for equivalent use of "pl.Type" and "pl.Type()", while @@ -63,66 +61,67 @@ def __repr__(self) -> str: class Int8(DataType): - """8-bit signed integer type""" + """8-bit signed integer type.""" class Int16(DataType): - """16-bit signed integer type""" + """16-bit signed integer type.""" class Int32(DataType): - """32-bit signed integer type""" + """32-bit signed integer type.""" class Int64(DataType): - """64-bit signed integer type""" + """64-bit signed integer type.""" class UInt8(DataType): - """8-bit unsigned integer type""" + """8-bit unsigned integer type.""" class UInt16(DataType): - """16-bit unsigned integer type""" + """16-bit unsigned integer type.""" class UInt32(DataType): - """32-bit unsigned integer type""" + """32-bit unsigned integer type.""" class UInt64(DataType): - """64-bit unsigned integer type""" + """64-bit unsigned integer type.""" class Float32(DataType): - """32-bit floating point type""" + """32-bit floating point type.""" class Float64(DataType): - """64-bit floating point type""" + """64-bit floating point type.""" class Boolean(DataType): - """Boolean type""" + """Boolean type.""" class Utf8(DataType): - """UTF-8 encoded string type""" + """UTF-8 encoded string type.""" class Null(DataType): - """Type representing Null / None values""" + """Type representing Null / None values.""" class List(DataType): def __init__(self, inner: type[DataType]): """ - Nested list/array type + Nested list/array type. Parameters ---------- inner The `DataType` of values within the list + """ self.inner = py_type_to_dtype(inner) @@ -152,15 +151,15 @@ def __hash__(self) -> int: class Date(DataType): - """Calendar date type""" + """Calendar date type.""" class Datetime(DataType): - """Calendar date and time type""" + """Calendar date and time type.""" def __init__(self, time_unit: str = "us", time_zone: str | None = None): """ - Calendar date and time type + Calendar date and time type. Parameters ---------- @@ -168,6 +167,7 @@ def __init__(self, time_unit: str = "us", time_zone: str | None = None): Any of {'ns', 'us', 'ms'} time_zone Timezone string as defined in pytz + """ self.tu = time_unit self.tz = time_zone @@ -186,16 +186,17 @@ def __hash__(self) -> int: class Duration(DataType): - """Time duration/delta type""" + """Time duration/delta type.""" def __init__(self, time_unit: str = "us"): """ - Time duration/delta type + Time duration/delta type. Parameters ---------- time_unit Any of {'ns', 'us', 'ms'} + """ self.tu = time_unit @@ -213,21 +214,21 @@ def __hash__(self) -> int: class Time(DataType): - """Time of day type""" + """Time of day type.""" class Object(DataType): - """Type for wrapping arbitrary Python objects""" + """Type for wrapping arbitrary Python objects.""" class Categorical(DataType): - """A categorical encoding of a set of strings""" + """A categorical encoding of a set of strings.""" class Field: def __init__(self, name: str, dtype: type[DataType]): """ - Definition of a single field within a `Struct` DataType + Definition of a single field within a `Struct` DataType. Parameters ---------- @@ -235,6 +236,7 @@ def __init__(self, name: str, dtype: type[DataType]): The name of the field within its parent `Struct` dtype The `DataType` of the field's values + """ self.name = name self.dtype = py_type_to_dtype(dtype) @@ -253,12 +255,13 @@ def __repr__(self) -> str: class Struct(DataType): def __init__(self, fields: Sequence[Field]): """ - Struct composite type + Struct composite type. Parameters ---------- fields The sequence of fields that make up the struct + """ self.fields = fields @@ -477,9 +480,7 @@ def py_type_to_dtype(data_type: Any) -> type[DataType]: def py_type_to_arrow_type(dtype: type[Any]) -> pa.lib.DataType: - """ - Convert a Python dtype to an Arrow dtype. - """ + """Convert a Python dtype to an Arrow dtype.""" try: return _PY_TYPE_TO_ARROW_TYPE[dtype] except KeyError: # pragma: no cover @@ -489,9 +490,7 @@ def py_type_to_arrow_type(dtype: type[Any]) -> pa.lib.DataType: def dtype_to_arrow_type(dtype: PolarsDataType) -> pa.lib.DataType: - """ - Convert a Polars dtype to an Arrow dtype. - """ + """Convert a Polars dtype to an Arrow dtype.""" try: return _DTYPE_TO_ARROW_TYPE[dtype] except KeyError: # pragma: no cover diff --git a/py-polars/polars/datatypes_constructor.py b/py-polars/polars/datatypes_constructor.py index 8a1f81bab6fe..9628dc1b760c 100644 --- a/py-polars/polars/datatypes_constructor.py +++ b/py-polars/polars/datatypes_constructor.py @@ -67,9 +67,7 @@ def polars_type_to_constructor( dtype: PolarsDataType, ) -> Callable[[str, Sequence[Any], bool], PySeries]: - """ - Get the right PySeries constructor for the given Polars dtype. - """ + """Get the right PySeries constructor for the given Polars dtype.""" try: return _POLARS_TYPE_TO_CONSTRUCTOR[dtype] except KeyError: # pragma: no cover @@ -94,9 +92,7 @@ def polars_type_to_constructor( def numpy_type_to_constructor(dtype: type[np.dtype]) -> Callable[..., PySeries]: - """ - Get the right PySeries constructor for the given Polars dtype. - """ + """Get the right PySeries constructor for the given Polars dtype.""" try: return _NUMPY_TYPE_TO_CONSTRUCTOR[dtype] except KeyError: @@ -117,9 +113,7 @@ def numpy_type_to_constructor(dtype: type[np.dtype]) -> Callable[..., PySeries]: def py_type_to_constructor(dtype: type[Any]) -> Callable[..., PySeries]: - """ - Get the right PySeries constructor for the given Python dtype. - """ + """Get the right PySeries constructor for the given Python dtype.""" try: return _PY_TYPE_TO_CONSTRUCTOR[dtype] except KeyError: diff --git a/py-polars/polars/exceptions.py b/py-polars/polars/exceptions.py index 6fa454dba62c..ecdac75d1b09 100644 --- a/py-polars/polars/exceptions.py +++ b/py-polars/polars/exceptions.py @@ -14,38 +14,28 @@ # when there is no binary yet class ArrowError(Exception): # type: ignore[no-redef] - """Exception raised the underlying Arrow library encounters an error""" + """Exception raised the underlying Arrow library encounters an error.""" class ComputeError(Exception): # type: ignore[no-redef] - """Exception raised when we couldn't finish the computation""" + """Exception raised when polars could not finish the computation.""" class NoDataError(Exception): # type: ignore[no-redef] - """ - Exception raised when an operation can not be performed on an empty data - structure - """ + """Exception raised when an operation can not be performed on an empty data structure.""" # noqa: E501 class NotFoundError(Exception): # type: ignore[no-redef] - """Exception raised when a specified column is not found""" + """Exception raised when a specified column is not found.""" class SchemaError(Exception): # type: ignore[no-redef] - """ - Exception raised when trying to combine data structures with mismatched schemas - """ + """Exception raised when trying to combine data structures with mismatched schemas.""" # noqa: E501 class ShapeError(Exception): # type: ignore[no-redef] - """ - Exception raised when trying to combine data structures with incompatible shapes - """ + """Exception raised when trying to combine data structures with incompatible shapes.""" # noqa: E501 class DuplicateError(Exception): # type: ignore[no-redef] - """Exception raised when a column name is duplicated""" + """Exception raised when a column name is duplicated.""" class PanicException(Exception): # type: ignore[no-redef] - """ - Exception raised when an unexpected state causes a panic in the underlying Rust - library - """ + """Exception raised when an unexpected state causes a panic in the underlying Rust library.""" # noqa: E501 __all__ = [ diff --git a/py-polars/polars/internals/anonymous_scan.py b/py-polars/polars/internals/anonymous_scan.py index 57add0ba9e74..98d0a2a544c1 100644 --- a/py-polars/polars/internals/anonymous_scan.py +++ b/py-polars/polars/internals/anonymous_scan.py @@ -16,6 +16,8 @@ def _deser_and_exec(buf: bytes, with_columns: list[str] | None) -> pli.DataFrame: """ + Deserialize and execute the given function for the projected columns. + Called from polars-lazy. Polars-lazy provides the bytes of the pickled function and the projected columns. @@ -25,6 +27,7 @@ def _deser_and_exec(buf: bytes, with_columns: list[str] | None) -> pli.DataFrame Pickled function with_columns Columns that are projected + """ func = pickle.loads(buf) return func(with_columns) @@ -34,15 +37,18 @@ def _scan_ds_impl( ds: pa.dataset.dataset, with_columns: list[str] | None ) -> pli.DataFrame: """ - Takes the projected columns and materializes an arrow table. + Take the projected columns and materialize an arrow table. Parameters ---------- ds + pyarrow dataset with_columns + Columns that are projected Returns ------- + DataFrame """ if not _PYARROW_AVAILABLE: # pragma: no cover @@ -54,13 +60,16 @@ def _scan_ds_impl( def _scan_ds(ds: pa.dataset.dataset) -> pli.LazyFrame: """ - Pickle the partially applied function `_scan_ds_impl`. The bytes are then sent to - the polars logical plan. It can be deserialized once executed and ran. + Pickle the partially applied function `_scan_ds_impl`. + + The bytes are then sent to the polars logical plan. It can be deserialized once + executed and ran. Parameters ---------- ds pyarrow dataset + """ func = partial(_scan_ds_impl, ds) func_serialized = pickle.dumps(func) @@ -69,12 +78,15 @@ def _scan_ds(ds: pa.dataset.dataset) -> pli.LazyFrame: def _scan_ipc_impl(uri: str, with_columns: list[str] | None) -> pli.DataFrame: """ - Takes the projected columns and materializes an arrow table. + Take the projected columns and materialize an arrow table. Parameters ---------- uri + Source URI with_columns + Columns that are projected + """ import polars as pl @@ -97,12 +109,15 @@ def _scan_ipc_fsspec( def _scan_parquet_impl(uri: str, with_columns: list[str] | None) -> pli.DataFrame: """ - Takes the projected columns and materializes an arrow table. + Take the projected columns and materialize an arrow table. Parameters ---------- uri + Source URI with_columns + Columns that are projected + """ import polars as pl diff --git a/py-polars/polars/internals/construction.py b/py-polars/polars/internals/construction.py index 70c64aaab9a8..abad36d33b93 100644 --- a/py-polars/polars/internals/construction.py +++ b/py-polars/polars/internals/construction.py @@ -66,17 +66,13 @@ def series_to_pyseries(name: str, values: pli.Series) -> PySeries: - """ - Construct a PySeries from a Polars Series. - """ + """Construct a PySeries from a Polars Series.""" values.rename(name, in_place=True) return values.inner() def arrow_to_pyseries(name: str, values: pa.Array, rechunk: bool = True) -> PySeries: - """ - Construct a PySeries from an Arrow array. - """ + """Construct a PySeries from an Arrow array.""" array = coerce_arrow(values) if hasattr(array, "num_chunks"): if array.num_chunks > 1: @@ -97,9 +93,7 @@ def arrow_to_pyseries(name: str, values: pa.Array, rechunk: bool = True) -> PySe def numpy_to_pyseries( name: str, values: np.ndarray, strict: bool = True, nan_to_null: bool = False ) -> PySeries: - """ - Construct a PySeries from a numpy array. - """ + """Construct a PySeries from a numpy array.""" if not values.flags["C_CONTIGUOUS"]: values = np.array(values) @@ -122,6 +116,7 @@ def _get_first_non_none(values: Sequence[Any | None]) -> Any: Return the first value from a sequence that isn't None. If sequence doesn't contain non-None values, return None. + """ if values is not None: return next((v for v in values if v is not None), None) @@ -129,10 +124,11 @@ def _get_first_non_none(values: Sequence[Any | None]) -> Any: def sequence_from_anyvalue_or_object(name: str, values: Sequence[Any]) -> PySeries: """ - Last resort conversion. AnyValues are most flexible and if they fail we go for - object types - """ + Last resort conversion. + AnyValues are most flexible and if they fail we go for object types + + """ try: return PySeries.new_from_anyvalues(name, values) # raised if we cannot convert to Wrap @@ -146,9 +142,7 @@ def sequence_to_pyseries( dtype: PolarsDataType | None = None, strict: bool = True, ) -> PySeries: - """ - Construct a PySeries from a sequence. - """ + """Construct a PySeries from a sequence.""" dtype_: type | None = None nested_dtype: PolarsDataType | type | None = None temporal_unit: str | None = None @@ -299,6 +293,8 @@ def _pandas_series_to_arrow( Returns ------- + Arrow Array + """ dtype = values.dtype if dtype == "object" and len(values) > 0: @@ -317,9 +313,7 @@ def _pandas_series_to_arrow( def pandas_to_pyseries( name: str, values: pd.Series | pd.DatetimeIndex, nan_to_none: bool = True ) -> PySeries: - """ - Construct a PySeries from a pandas Series or DatetimeIndex. - """ + """Construct a PySeries from a pandas Series or DatetimeIndex.""" if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required when constructing a PySeries from a pandas Series." @@ -340,9 +334,7 @@ def pandas_to_pyseries( def _handle_columns_arg( data: list[PySeries], columns: Sequence[str] | None = None ) -> list[PySeries]: - """ - Rename data according to columns argument. - """ + """Rename data according to columns argument.""" if not columns: return data else: @@ -357,9 +349,7 @@ def _handle_columns_arg( def _post_apply_columns(pydf: PyDataFrame, columns: ColumnsType) -> PyDataFrame: - """ - Apply 'columns' param _after_ PyDataFrame creation (if no alternative). - """ + """Apply 'columns' param _after_ PyDataFrame creation (if no alternative).""" pydf_columns, pydf_dtypes = pydf.columns(), pydf.dtypes() columns, dtypes = _unpack_columns(columns or pydf_columns) if columns != pydf_columns: @@ -381,8 +371,9 @@ def _unpack_columns( n_expected: int | None = None, ) -> tuple[list[str], dict[str, PolarsDataType]]: """ - Unpack column names and create dtype lookup for any (name,dtype) pairs or schema - dict input. + Unpack column names and create dtype lookup. + + Works for any (name, dtype) pairs or schema dict input. """ if isinstance(columns, dict): columns = list(columns.items()) @@ -408,9 +399,7 @@ def _unpack_columns( def dict_to_pydf( data: dict[str, Sequence[Any]], columns: ColumnsType | None = None ) -> PyDataFrame: - """ - Construct a PyDataFrame from a dictionary of sequences. - """ + """Construct a PyDataFrame from a dictionary of sequences.""" if columns is not None: # the columns arg may also set the dtype of the series columns, dtypes = _unpack_columns(columns, lookup_names=data.keys()) @@ -461,9 +450,7 @@ def sequence_to_pydf( columns: ColumnsType | None = None, orient: Literal["col", "row"] | None = None, ) -> PyDataFrame: - """ - Construct a PyDataFrame from a sequence. - """ + """Construct a PyDataFrame from a sequence.""" data_series: list[PySeries] if len(data) == 0: @@ -523,9 +510,7 @@ def numpy_to_pydf( columns: ColumnsType | None = None, orient: Literal["col", "row"] | None = None, ) -> PyDataFrame: - """ - Construct a PyDataFrame from a numpy ndarray. - """ + """Construct a PyDataFrame from a numpy ndarray.""" shape = data.shape n_columns = ( 0 @@ -584,9 +569,7 @@ def numpy_to_pydf( def arrow_to_pydf( data: pa.Table, columns: ColumnsType | None = None, rechunk: bool = True ) -> PyDataFrame: - """ - Construct a PyDataFrame from an Arrow Table. - """ + """Construct a PyDataFrame from an Arrow Table.""" if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required when constructing a PyDataFrame from an Arrow Table." @@ -648,9 +631,7 @@ def arrow_to_pydf( def series_to_pydf(data: pli.Series, columns: ColumnsType | None = None) -> PyDataFrame: - """ - Construct a PyDataFrame from a Polars Series. - """ + """Construct a PyDataFrame from a Polars Series.""" data_series = [data.inner()] series_name = [s.name() for s in data_series] columns, dtypes = _unpack_columns(columns or series_name, n_expected=1) @@ -669,9 +650,7 @@ def pandas_to_pydf( rechunk: bool = True, nan_to_none: bool = True, ) -> PyDataFrame: - """ - Construct a PyDataFrame from a pandas DataFrame. - """ + """Construct a PyDataFrame from a pandas DataFrame.""" if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required when constructing a PyDataFrame from a pandas" diff --git a/py-polars/polars/internals/expr.py b/py-polars/polars/internals/expr.py index 152018f480fe..5f016198ca1e 100644 --- a/py-polars/polars/internals/expr.py +++ b/py-polars/polars/internals/expr.py @@ -51,9 +51,7 @@ def wrap_expr(pyexpr: PyExpr) -> Expr: class Expr: - """ - Expressions that can be used in various contexts. - """ + """Expressions that can be used in various contexts.""" def __init__(self) -> None: self._pyexpr: PyExpr # pragma: no cover @@ -189,9 +187,7 @@ def __neg__(self) -> Expr: def __array_ufunc__( self, ufunc: Callable[..., Any], method: str, *inputs: Any, **kwargs: Any ) -> Expr: - """ - Numpy universal functions. - """ + """Numpy universal functions.""" if not _NUMPY_AVAILABLE: raise ImportError("'numpy' is required for this functionality.") out_type = ufunc(np.array([1])).dtype @@ -266,7 +262,7 @@ def to_physical(self) -> Expr: def any(self) -> Expr: """ - Check if any boolean value in a Boolean column is `True` + Check if any boolean value in a Boolean column is `True`. Returns ------- @@ -284,6 +280,7 @@ def any(self) -> Expr: ╞══════╪═══════╡ │ true ┆ false │ └──────┴───────┘ + """ return wrap_expr(self._pyexpr.any()) @@ -312,25 +309,20 @@ def all(self) -> Expr: ╞══════╪═══════╪═══════╡ │ true ┆ false ┆ false │ └──────┴───────┴───────┘ + """ return wrap_expr(self._pyexpr.all()) def sqrt(self) -> Expr: - """ - Compute the square root of the elements - """ + """Compute the square root of the elements.""" return self**0.5 def log10(self) -> Expr: - """ - Return the base 10 logarithm of the input array, element-wise. - """ + """Compute the base 10 logarithm of the input array, element-wise.""" return self.log(10.0) def exp(self) -> Expr: - """ - Return the exponential element-wise - """ + """Compute the exponential, element-wise.""" return wrap_expr(self._pyexpr.exp()) def alias(self, name: str) -> Expr: @@ -550,7 +542,6 @@ def keep_name(self) -> Expr: └─────┴───────────┘ """ - return wrap_expr(self._pyexpr.keep_name()) def prefix(self, prefix: str) -> Expr: @@ -840,6 +831,7 @@ def is_finite(self) -> Expr: ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ │ true ┆ false │ └──────┴───────┘ + """ return wrap_expr(self._pyexpr.is_finite()) @@ -872,6 +864,7 @@ def is_infinite(self) -> Expr: ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ │ false ┆ true │ └───────┴───────┘ + """ return wrap_expr(self._pyexpr.is_infinite()) @@ -1089,9 +1082,7 @@ def append(self, other: Expr, upcast: bool = True) -> Expr: return wrap_expr(self._pyexpr.append(other._pyexpr, upcast)) def rechunk(self) -> Expr: - """ - Create a single chunk of memory for this Series. - """ + """Create a single chunk of memory for this Series.""" return wrap_expr(self._pyexpr.rechunk()) def drop_nulls(self) -> Expr: @@ -1150,6 +1141,7 @@ def drop_nans(self) -> Expr: ├╌╌╌╌╌╌┤ │ 4.0 │ └──────┘ + """ return wrap_expr(self._pyexpr.drop_nans()) @@ -1190,6 +1182,7 @@ def cumsum(self, reverse: bool = False) -> Expr: ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 10 ┆ 4 │ └─────┴───────────┘ + """ return wrap_expr(self._pyexpr.cumsum(reverse)) @@ -1230,6 +1223,7 @@ def cumprod(self, reverse: bool = False) -> Expr: ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 24 ┆ 4 │ └─────┴───────────┘ + """ return wrap_expr(self._pyexpr.cumprod(reverse)) @@ -1265,6 +1259,7 @@ def cummin(self, reverse: bool = False) -> Expr: ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 1 ┆ 4 │ └─────┴───────────┘ + """ return wrap_expr(self._pyexpr.cummin(reverse)) @@ -1337,6 +1332,7 @@ def cumcount(self, reverse: bool = False) -> Expr: ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ 0 │ └─────┴───────────┘ + """ return wrap_expr(self._pyexpr.cumcount(reverse)) @@ -1365,6 +1361,7 @@ def floor(self) -> Expr: ├╌╌╌╌╌┤ │ 1.0 │ └─────┘ + """ return wrap_expr(self._pyexpr.floor()) @@ -1393,6 +1390,7 @@ def ceil(self) -> Expr: ├╌╌╌╌╌┤ │ 2.0 │ └─────┘ + """ return wrap_expr(self._pyexpr.ceil()) @@ -1423,6 +1421,7 @@ def round(self, decimals: int) -> Expr: ├╌╌╌╌╌┤ │ 1.2 │ └─────┘ + """ return wrap_expr(self._pyexpr.round(decimals)) @@ -1481,6 +1480,7 @@ def mode(self) -> Expr: ├╌╌╌╌╌┼╌╌╌╌╌┤ │ 1 ┆ 2 │ └─────┴─────┘ + """ return wrap_expr(self._pyexpr.mode()) @@ -1599,6 +1599,7 @@ def sort(self, reverse: bool = False, nulls_last: bool = False) -> Expr: ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ │ one ┆ [1, 2, 98] │ └───────┴────────────┘ + """ return wrap_expr(self._pyexpr.sort_with(reverse, nulls_last)) @@ -1637,6 +1638,7 @@ def arg_sort(self, reverse: bool = False) -> Expr: ├╌╌╌╌╌┤ │ 2 │ └─────┘ + """ return wrap_expr(self._pyexpr.arg_sort(reverse)) @@ -1660,6 +1662,7 @@ def arg_max(self) -> Expr: ╞═════╡ │ 2 │ └─────┘ + """ return wrap_expr(self._pyexpr.arg_max()) @@ -1739,6 +1742,7 @@ def sort_by( ├╌╌╌╌╌╌╌┤ │ two │ └───────┘ + """ if not isinstance(by, list): by = [by] @@ -1864,6 +1868,7 @@ def shift_and_fill( ├╌╌╌╌╌┤ │ 3 │ └─────┘ + """ fill_value = expr_to_lit_or_expr(fill_value, str_to_lit=True) return wrap_expr(self._pyexpr.shift_and_fill(periods, fill_value._pyexpr)) @@ -1954,6 +1959,7 @@ def fill_nan(self, fill_value: str | int | float | bool | Expr) -> Expr: ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ zero ┆ 6.0 │ └──────┴──────┘ + """ fill_value = expr_to_lit_or_expr(fill_value, str_to_lit=True) return wrap_expr(self._pyexpr.fill_nan(fill_value._pyexpr)) @@ -2012,6 +2018,7 @@ def backward_fill(self, limit: int | None = None) -> Expr: ├╌╌╌╌╌╌┼╌╌╌╌╌┤ │ null ┆ 6 │ └──────┴─────┘ + """ return wrap_expr(self._pyexpr.backward_fill(limit)) @@ -2071,6 +2078,7 @@ def std(self) -> Expr: ╞═════╡ │ 1.0 │ └─────┘ + """ return wrap_expr(self._pyexpr.std()) @@ -2110,6 +2118,7 @@ def max(self) -> Expr: ╞═════╡ │ 1 │ └─────┘ + """ return wrap_expr(self._pyexpr.max()) @@ -2194,6 +2203,7 @@ def median(self) -> Expr: ╞═════╡ │ 0.0 │ └─────┘ + """ return wrap_expr(self._pyexpr.median()) @@ -2213,6 +2223,7 @@ def product(self) -> Expr: ╞═════╡ │ 6 │ └─────┘ + """ return wrap_expr(self._pyexpr.product()) @@ -2252,6 +2263,7 @@ def null_count(self) -> "Expr": ╞═════╪═════╡ │ 2 ┆ 0 │ └─────┴─────┘ + """ return wrap_expr(self._pyexpr.null_count()) @@ -2346,6 +2358,7 @@ def first(self) -> Expr: ╞═════╡ │ 1 │ └─────┘ + """ return wrap_expr(self._pyexpr.first()) @@ -2450,7 +2463,6 @@ def over(self, expr: str | Expr | List[Expr | str]) -> Expr: └────────┘ """ - pyexprs = selection_to_pyexpr_list(expr) return wrap_expr(self._pyexpr.over(pyexprs)) @@ -2475,6 +2487,7 @@ def is_unique(self) -> Expr: ├╌╌╌╌╌╌╌┤ │ true │ └───────┘ + """ return wrap_expr(self._pyexpr.is_unique()) @@ -2534,6 +2547,7 @@ def is_duplicated(self) -> Expr: ├╌╌╌╌╌╌╌┤ │ false │ └───────┘ + """ return wrap_expr(self._pyexpr.is_duplicated()) @@ -2613,6 +2627,7 @@ def filter(self, predicate: Expr) -> Expr: ---------- predicate Boolean expression. + """ return wrap_expr(self._pyexpr.filter(predicate._pyexpr)) @@ -2624,6 +2639,7 @@ def where(self, predicate: Expr) -> Expr: ---------- predicate Boolean expression. + """ return self.filter(predicate) @@ -2650,6 +2666,8 @@ def map( Lambda/ function to apply. return_dtype Dtype of the output Series. + agg_list + Aggregate list Examples -------- @@ -2668,6 +2686,7 @@ def map( ╞══════╪════════╡ │ 1 ┆ 0 │ └──────┴────────┘ + """ if return_dtype is not None: return_dtype = py_type_to_dtype(return_dtype) @@ -2780,8 +2799,8 @@ def apply( ... pl.col("a").sum(), ... ) ... ) # doctest: +IGNORE_RESULT - """ + """ # input x: Series of type list containing the group values def wrap_f(x: pli.Series) -> pli.Series: # pragma: no cover return x.apply(f, return_dtype=return_dtype) @@ -2846,7 +2865,6 @@ def flatten(self) -> Expr: └───────┘ """ - return wrap_expr(self._pyexpr.explode()) def explode(self) -> Expr: @@ -2910,17 +2928,13 @@ def take_every(self, n: int) -> Expr: return wrap_expr(self._pyexpr.take_every(n)) def head(self, n: int | Expr | None = None) -> Expr: - """ - Take the first n values. - """ + """Take the first n values.""" if isinstance(n, Expr): return self.slice(0, n) return wrap_expr(self._pyexpr.head(n)) def tail(self, n: int | None = None) -> Expr: - """ - Take the last n values. - """ + """Take the last n values.""" return wrap_expr(self._pyexpr.tail(n)) def pow(self, exponent: int | float | pli.Series | Expr) -> Expr: @@ -2945,6 +2959,7 @@ def pow(self, exponent: int | float | pli.Series | Expr) -> Expr: ├╌╌╌╌╌╌┤ │ 64.0 │ └──────┘ + """ exponent = expr_to_lit_or_expr(exponent) return wrap_expr(self._pyexpr.pow(exponent._pyexpr)) @@ -3171,12 +3186,13 @@ def reinterpret(self, signed: bool) -> Expr: ---------- signed If True, reinterpret as `pl.Int64`. Otherwise, reinterpret as `pl.UInt64`. + """ return wrap_expr(self._pyexpr.reinterpret(signed)) def inspect(self, fmt: str = "{}") -> Expr: """ - Prints the value that this expression evaluates to and passes on the value. + Print the value that this expression evaluates to and pass on the value. Examples -------- @@ -3211,9 +3227,7 @@ def inspect(s: pli.Series) -> pli.Series: # pragma: no cover return self.map(inspect, return_dtype=None, agg_list=True) def interpolate(self) -> Expr: - """ - Interpolate intermediate values. The interpolation method is linear. - """ + """Linearly interpolate intermediate values.""" return wrap_expr(self._pyexpr.interpolate()) def rolling_min( @@ -3226,7 +3240,8 @@ def rolling_min( closed: str = "left", ) -> Expr: """ - apply a rolling min (moving min) over the values in this array. + Apply a rolling min (moving min) over the values in this array. + A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their sum. @@ -3305,6 +3320,7 @@ def rolling_min( ├╌╌╌╌╌╌┤ │ 5.0 │ └──────┘ + """ window_size, min_periods = _prepare_rolling_window_args( window_size, min_periods @@ -3326,6 +3342,7 @@ def rolling_max( ) -> Expr: """ Apply a rolling max (moving max) over the values in this array. + A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their sum. @@ -3425,6 +3442,7 @@ def rolling_mean( ) -> Expr: """ Apply a rolling mean (moving mean) over the values in this array. + A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their sum. @@ -3522,6 +3540,7 @@ def rolling_sum( ) -> Expr: """ Apply a rolling sum (moving sum) over the values in this array. + A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their sum. @@ -3620,7 +3639,7 @@ def rolling_std( closed: str = "left", ) -> Expr: """ - Compute a rolling std dev + Compute a rolling standard deviation. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the @@ -3762,7 +3781,7 @@ def rolling_median( closed: str = "left", ) -> Expr: """ - Compute a rolling median + Compute a rolling median. Parameters ---------- @@ -3831,7 +3850,7 @@ def rolling_quantile( closed: str = "left", ) -> Expr: """ - Compute a rolling quantile + Compute a rolling quantile. Parameters ---------- @@ -3909,7 +3928,8 @@ def rolling_apply( center: bool = False, ) -> Expr: """ - Allows a custom rolling window function. + Apply a custom rolling window function. + Prefer the specific rolling window functions over this one, as they are faster. Prefer: @@ -3974,7 +3994,7 @@ def rolling_apply( def rolling_skew(self, window_size: int, bias: bool = True) -> Expr: """ - Compute a rolling skew + Compute a rolling skew. Parameters ---------- @@ -3982,19 +4002,16 @@ def rolling_skew(self, window_size: int, bias: bool = True) -> Expr: Size of the rolling window bias If False, then the calculations are corrected for statistical bias. + """ return wrap_expr(self._pyexpr.rolling_skew(window_size, bias)) def abs(self) -> Expr: - """ - Take absolute values - """ + """Compute absolute values.""" return wrap_expr(self._pyexpr.abs()) def argsort(self, reverse: bool = False) -> Expr: - """ - alias for `arg_sort` - """ + """Alias for `arg_sort`.""" return self.arg_sort(reverse) def rank(self, method: str = "average", reverse: bool = False) -> Expr: @@ -4147,7 +4164,9 @@ def pct_change(self, n: int = 1) -> Expr: return wrap_expr(self._pyexpr.pct_change(n)) def skew(self, bias: bool = True) -> Expr: - r"""Compute the sample skewness of a data set. + r""" + Compute the sample skewness of a data set. + For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The @@ -4186,7 +4205,9 @@ def skew(self, bias: bool = True) -> Expr: return wrap_expr(self._pyexpr.skew(bias)) def kurtosis(self, fisher: bool = True, bias: bool = True) -> Expr: - """Compute the kurtosis (Fisher or Pearson) of a dataset. + """ + Compute the kurtosis (Fisher or Pearson) of a dataset. + Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. @@ -4202,6 +4223,7 @@ def kurtosis(self, fisher: bool = True, bias: bool = True) -> Expr: Pearson's definition is used (normal ==> 3.0). bias : bool, optional If False, then the calculations are corrected for statistical bias. + """ return wrap_expr(self._pyexpr.kurtosis(fisher, bias)) @@ -4246,21 +4268,27 @@ def clip(self, min_val: int | float, max_val: int | float) -> Expr: def lower_bound(self) -> Expr: """ + Calculate the lower bound. + Returns a unit Series with the lowest value possible for the dtype of this expression. + """ return wrap_expr(self._pyexpr.lower_bound()) def upper_bound(self) -> Expr: """ + Calculate the upper bound. + Returns a unit Series with the highest value possible for the dtype of this expression. + """ return wrap_expr(self._pyexpr.upper_bound()) def sign(self) -> Expr: """ - Returns an element-wise indication of the sign of a number. + Return an element-wise indication of the sign of a number. Examples -------- @@ -4619,6 +4647,7 @@ def shuffle(self, seed: int | None = None) -> Expr: seed Seed initialization. If None given, the `random` module is used to generate a random seed. + """ if seed is None: seed = random.randint(0, 10000) @@ -4644,6 +4673,7 @@ def sample( Seed initialization. If None given a random seed is used. shuffle Shuffle the order of sampled data points. + """ return wrap_expr( self._pyexpr.sample_frac(fraction, with_replacement, shuffle, seed) @@ -4844,7 +4874,7 @@ def value_counts(self, multithreaded: bool = False, sort: bool = False) -> Expr: def unique_counts(self) -> Expr: """ - Returns a count of the unique values in the order of appearance. + Return a count of the unique values in the order of appearance. This method differs from `value_counts` in that it does not return the values, only the counts and might be faster @@ -4885,6 +4915,7 @@ def log(self, base: float = math.e) -> Expr: ---------- base Given base, defaults to `e` + """ return wrap_expr(self._pyexpr.log(base)) @@ -4973,6 +5004,7 @@ def set_sorted(self, reverse: bool = False) -> Expr: ---------- reverse If the `Series` order is reversed, e.g. descending. + """ return self.map(lambda s: s.set_sorted(reverse)) @@ -4981,51 +5013,39 @@ def set_sorted(self, reverse: bool = False) -> Expr: @property def dt(self) -> ExprDateTimeNameSpace: - """ - Create an object namespace of all datetime related methods. - """ + """Create an object namespace of all datetime related methods.""" return ExprDateTimeNameSpace(self) @property def str(self) -> ExprStringNameSpace: - """ - Create an object namespace of all string related methods. - """ + """Create an object namespace of all string related methods.""" return ExprStringNameSpace(self) @property def arr(self) -> ExprListNameSpace: - """ - Create an object namespace of all list related methods. - """ + """Create an object namespace of all list related methods.""" return ExprListNameSpace(self) @property def cat(self) -> ExprCatNameSpace: - """ - Create an object namespace of all categorical related methods. - """ + """Create an object namespace of all categorical related methods.""" return ExprCatNameSpace(self) @property def struct(self) -> ExprStructNameSpace: - """ - Create an object namespace of all struct related methods. - """ + """Create an object namespace of all struct related methods.""" return ExprStructNameSpace(self) class ExprStructNameSpace: - """ - Namespace for struct related expressions - """ + """Namespace for struct related expressions.""" def __init__(self, expr: Expr): self._pyexpr = expr._pyexpr def field(self, name: str) -> Expr: """ - Retrieve one of the fields of this `Struct` as a new Series + Retrieve one of the fields of this `Struct` as a new Series. Parameters ---------- @@ -5109,9 +5129,7 @@ def rename_fields(self, names: List[str]) -> Expr: class ExprListNameSpace: - """ - Namespace for list related expressions - """ + """Namespace for list related expressions.""" def __init__(self, expr: Expr): self._pyexpr = expr._pyexpr @@ -5139,27 +5157,19 @@ def lengths(self) -> Expr: return wrap_expr(self._pyexpr.arr_lengths()) def sum(self) -> Expr: - """ - Sum all the arrays in the list - """ + """Sum all the arrays in the list.""" return wrap_expr(self._pyexpr.lst_sum()) def max(self) -> Expr: - """ - Compute the max value of the arrays in the list - """ + """Compute the max value of the arrays in the list.""" return wrap_expr(self._pyexpr.lst_max()) def min(self) -> Expr: - """ - Compute the min value of the arrays in the list - """ + """Compute the min value of the arrays in the list.""" return wrap_expr(self._pyexpr.lst_min()) def mean(self) -> Expr: - """ - Compute the mean value of the arrays in the list - """ + """Compute the mean value of the arrays in the list.""" return wrap_expr(self._pyexpr.lst_mean()) def sort(self, reverse: bool = False) -> Expr: @@ -5215,9 +5225,7 @@ def reverse(self) -> Expr: return wrap_expr(self._pyexpr.lst_reverse()) def unique(self) -> Expr: - """ - Get the unique/distinct values in the list - """ + """Get the unique/distinct values in the list.""" return wrap_expr(self._pyexpr.lst_unique()) def concat( @@ -5409,7 +5417,6 @@ def join(self, separator: str) -> Expr: └───────┘ """ - return wrap_expr(self._pyexpr.lst_join(separator)) def arg_min(self) -> Expr: @@ -5419,6 +5426,7 @@ def arg_min(self) -> Expr: Returns ------- Series of dtype UInt32/UInt64 (depending on compilation) + """ return wrap_expr(self._pyexpr.lst_arg_min()) @@ -5429,6 +5437,7 @@ def arg_max(self) -> Expr: Returns ------- Series of dtype UInt32/UInt64 (depending on compilation) + """ return wrap_expr(self._pyexpr.lst_arg_max()) @@ -5594,7 +5603,6 @@ def to_struct( {'col_name_0': 1, 'col_name_1': 2, 'col_name_2': None}] """ - return wrap_expr(self._pyexpr.lst_to_struct(n_field_strategy, name_generator)) def eval(self, expr: Expr, parallel: bool = False) -> Expr: @@ -5637,9 +5645,7 @@ def eval(self, expr: Expr, parallel: bool = False) -> Expr: class ExprStringNameSpace: - """ - Namespace for string related expressions - """ + """Namespace for string related expressions.""" def __init__(self, expr: Expr): self._pyexpr = expr._pyexpr @@ -5773,33 +5779,23 @@ def concat(self, delimiter: str = "-") -> Expr: return wrap_expr(self._pyexpr.str_concat(delimiter)) def to_uppercase(self) -> Expr: - """ - Transform to uppercase variant. - """ + """Transform to uppercase variant.""" return wrap_expr(self._pyexpr.str_to_uppercase()) def to_lowercase(self) -> Expr: - """ - Transform to lowercase variant. - """ + """Transform to lowercase variant.""" return wrap_expr(self._pyexpr.str_to_lowercase()) def strip(self) -> Expr: - """ - Remove leading and trailing whitespace. - """ + """Remove leading and trailing whitespace.""" return wrap_expr(self._pyexpr.str_strip()) def lstrip(self) -> Expr: - """ - Remove leading whitespace. - """ + """Remove leading whitespace.""" return wrap_expr(self._pyexpr.str_lstrip()) def rstrip(self) -> Expr: - """ - Remove trailing whitespace. - """ + """Remove trailing whitespace.""" return wrap_expr(self._pyexpr.str_rstrip()) def zfill(self, alignment: int) -> Expr: @@ -5966,6 +5962,7 @@ def contains(self, pattern: str, literal: bool = False) -> Expr: -------- starts_with : Check if string values start with a substring. ends_with : Check if string values end with a substring. + """ return wrap_expr(self._pyexpr.str_contains(pattern, literal)) @@ -6013,6 +6010,7 @@ def ends_with(self, sub: str) -> Expr: -------- contains : Check if string contains a substring that matches a regex. starts_with : Check if string values start with a substring. + """ return wrap_expr(self._pyexpr.str_ends_with(sub)) @@ -6060,6 +6058,7 @@ def starts_with(self, sub: str) -> Expr: -------- contains : Check if string contains a substring that matches a regex. ends_with : Check if string values end with a substring. + """ return wrap_expr(self._pyexpr.str_starts_with(sub)) @@ -6110,7 +6109,7 @@ def json_path_match(self, json_path: str) -> Expr: def decode(self, encoding: str, strict: bool = False) -> Expr: """ - Decodes a value using the provided encoding + Decode a value using the provided encoding. Parameters ---------- @@ -6137,6 +6136,7 @@ def decode(self, encoding: str, strict: bool = False) -> Expr: ├╌╌╌╌╌╌╌╌╌┤ │ null │ └─────────┘ + """ if encoding == "hex": return wrap_expr(self._pyexpr.str_hex_decode(strict)) @@ -6147,7 +6147,7 @@ def decode(self, encoding: str, strict: bool = False) -> Expr: def encode(self, encoding: str) -> Expr: """ - Encodes a value using the provided encoding + Encode a value using the provided encoding. Parameters ---------- @@ -6417,7 +6417,7 @@ def split_exact(self, by: str, n: int, inclusive: bool = False) -> Expr: return wrap_expr(self._pyexpr.str_split_exact(by, n)) def replace(self, pattern: str, value: str, literal: bool = False) -> Expr: - """ + r""" Replace first matching regex/literal substring with a new string value. Parameters @@ -6484,6 +6484,7 @@ def replace_all(self, pattern: str, value: str, literal: bool = False) -> Expr: ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ 123-123 │ └─────┴─────────┘ + """ return wrap_expr(self._pyexpr.str_replace_all(pattern, value, literal)) @@ -6550,9 +6551,7 @@ def slice(self, start: int, length: int | None = None) -> Expr: class ExprDateTimeNameSpace: - """ - Namespace for datetime related expressions. - """ + """Namespace for datetime related expressions.""" def __init__(self, expr: Expr): self._pyexpr = expr._pyexpr @@ -6682,6 +6681,7 @@ def strftime(self, fmt: str) -> Expr: def year(self) -> Expr: """ Extract year from underlying Date representation. + Can be performed on Date and Datetime. Returns the year number in the calendar date. @@ -6689,12 +6689,14 @@ def year(self) -> Expr: Returns ------- Year as Int32 + """ return wrap_expr(self._pyexpr.year()) def quarter(self) -> Expr: """ Extract quarter from underlying Date representation. + Can be performed on Date and Datetime. Returns the quarter ranging from 1 to 4. @@ -6702,12 +6704,14 @@ def quarter(self) -> Expr: Returns ------- Quarter as UInt32 + """ return wrap_expr(self._pyexpr.quarter()) def month(self) -> Expr: """ Extract month from underlying Date representation. + Can be performed on Date and Datetime. Returns the month number starting from 1. @@ -6716,12 +6720,14 @@ def month(self) -> Expr: Returns ------- Month as UInt32 + """ return wrap_expr(self._pyexpr.month()) def week(self) -> Expr: """ Extract the week from the underlying Date representation. + Can be performed on Date and Datetime Returns the ISO week number starting from 1. @@ -6730,12 +6736,14 @@ def week(self) -> Expr: Returns ------- Week number as UInt32 + """ return wrap_expr(self._pyexpr.week()) def weekday(self) -> Expr: """ Extract the week day from the underlying Date representation. + Can be performed on Date and Datetime. Returns the weekday number where monday = 0 and sunday = 6 @@ -6743,12 +6751,14 @@ def weekday(self) -> Expr: Returns ------- Week day as UInt32 + """ return wrap_expr(self._pyexpr.weekday()) def day(self) -> Expr: """ Extract day from underlying Date representation. + Can be performed on Date and Datetime. Returns the day of month starting from 1. @@ -6757,12 +6767,14 @@ def day(self) -> Expr: Returns ------- Day as UInt32 + """ return wrap_expr(self._pyexpr.day()) def ordinal_day(self) -> Expr: """ Extract ordinal day from underlying Date representation. + Can be performed on Date and Datetime. Returns the day of year starting from 1. @@ -6771,12 +6783,14 @@ def ordinal_day(self) -> Expr: Returns ------- Day as UInt32 + """ return wrap_expr(self._pyexpr.ordinal_day()) def hour(self) -> Expr: """ Extract hour from underlying DateTime representation. + Can be performed on Datetime. Returns the hour number from 0 to 23. @@ -6784,12 +6798,14 @@ def hour(self) -> Expr: Returns ------- Hour as UInt32 + """ return wrap_expr(self._pyexpr.hour()) def minute(self) -> Expr: """ Extract minutes from underlying DateTime representation. + Can be performed on Datetime. Returns the minute number from 0 to 59. @@ -6797,12 +6813,14 @@ def minute(self) -> Expr: Returns ------- Minute as UInt32 + """ return wrap_expr(self._pyexpr.minute()) def second(self) -> Expr: """ Extract seconds from underlying DateTime representation. + Can be performed on Datetime. Returns the second number from 0 to 59. @@ -6810,12 +6828,14 @@ def second(self) -> Expr: Returns ------- Second as UInt32 + """ return wrap_expr(self._pyexpr.second()) def nanosecond(self) -> Expr: """ Extract seconds from underlying DateTime representation. + Can be performed on Datetime. Returns the number of nanoseconds since the whole non-leap second. @@ -6824,13 +6844,12 @@ def nanosecond(self) -> Expr: Returns ------- Nanosecond as UInt32 + """ return wrap_expr(self._pyexpr.nanosecond()) def to_python_datetime(self) -> Expr: - """ - Go from Date/Datetime to python DateTime objects - """ + """Go from Date/Datetime to python DateTime objects.""" return wrap_expr(self._pyexpr).map( lambda s: s.dt.to_python_datetime(), return_dtype=Object ) @@ -6843,6 +6862,7 @@ def epoch(self, tu: str = "us") -> Expr: ---------- tu One of {'ns', 'us', 'ms', 's', 'd'} + """ if tu in DTYPE_TEMPORAL_UNITS: return self.timestamp(tu) @@ -6864,6 +6884,7 @@ def epoch_days(self) -> Expr: Returns ------- Days as Int32 + """ return wrap_expr(self._pyexpr).cast(Date).cast(Int32) @@ -6880,6 +6901,7 @@ def epoch_milliseconds(self) -> Expr: Returns ------- Milliseconds as Int64 + """ return self.timestamp("ms") @@ -6894,6 +6916,7 @@ def epoch_seconds(self) -> Expr: Returns ------- Milliseconds as Int64 + """ return wrap_expr(self._pyexpr.dt_epoch_seconds()) @@ -6905,18 +6928,22 @@ def timestamp(self, tu: str = "us") -> Expr: ---------- tu One of {'ns', 'us', 'ms'} + """ return wrap_expr(self._pyexpr.timestamp(tu)) def with_time_unit(self, tu: str) -> Expr: """ - Set time unit a Series of dtype Datetime or Duration. This does not modify - underlying data, and should be used to fix an incorrect time unit. + Set time unit a Series of dtype Datetime or Duration. + + This does not modify underlying data, and should be used to fix an incorrect + time unit. Parameters ---------- tu Time unit for the `Datetime` Series: one of {"ns", "us", "ms"} + """ return wrap_expr(self._pyexpr.dt_with_time_unit(tu)) @@ -6928,6 +6955,7 @@ def cast_time_unit(self, tu: str) -> Expr: ---------- tu Time unit for the `Datetime` Series: any of {"ns", "us", "ms"} + """ return wrap_expr(self._pyexpr.dt_cast_time_unit(tu)) @@ -6939,13 +6967,13 @@ def and_time_unit(self, tu: str, dtype: type[DataType] = Datetime) -> Expr: .. deprecated:: Use :func:`with_time_unit` instead. - Parameters ---------- tu Time unit for the `Datetime` Series: any of {"ns", "us", "ms"} dtype Output data type. + """ return self.with_time_unit(tu) @@ -6987,6 +7015,7 @@ def days(self) -> Expr: Returns ------- A series of dtype Int64 + """ return wrap_expr(self._pyexpr.duration_days()) @@ -6997,6 +7026,7 @@ def hours(self) -> Expr: Returns ------- A series of dtype Int64 + """ return wrap_expr(self._pyexpr.duration_hours()) @@ -7007,6 +7037,7 @@ def minutes(self) -> Expr: Returns ------- A series of dtype Int64 + """ return wrap_expr(self._pyexpr.duration_minutes()) @@ -7017,6 +7048,7 @@ def seconds(self) -> Expr: Returns ------- A series of dtype Int64 + """ return wrap_expr(self._pyexpr.duration_seconds()) @@ -7027,6 +7059,7 @@ def milliseconds(self) -> Expr: Returns ------- A series of dtype Int64 + """ return wrap_expr(self._pyexpr.duration_milliseconds()) @@ -7037,6 +7070,7 @@ def nanoseconds(self) -> Expr: Returns ------- A series of dtype Int64 + """ return wrap_expr(self._pyexpr.duration_nanoseconds()) @@ -7067,6 +7101,7 @@ def offset_by(self, by: str) -> Expr: Returns ------- Date/Datetime expression + """ return wrap_expr(self._pyexpr.dt_offset_by(by)) @@ -7085,7 +7120,7 @@ def expr_to_lit_or_expr( str_to_lit: bool = True, ) -> Expr: """ - Helper function that converts args to expressions. + Convert args to expressions. Parameters ---------- @@ -7097,6 +7132,7 @@ def expr_to_lit_or_expr( Returns ------- + Expr """ if isinstance(expr, str) and not str_to_lit: @@ -7117,9 +7153,7 @@ def expr_to_lit_or_expr( class ExprCatNameSpace: - """ - Namespace for categorical related expressions - """ + """Namespace for categorical related expressions.""" def __init__(self, expr: Expr): self._pyexpr = expr._pyexpr diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py index 5a14912da565..f672dc14108a 100644 --- a/py-polars/polars/internals/frame.py +++ b/py-polars/polars/internals/frame.py @@ -1,6 +1,4 @@ -""" -Module containing logic related to eager DataFrames -""" +"""Module containing logic related to eager DataFrames.""" from __future__ import annotations import os @@ -347,7 +345,7 @@ def __init__( def estimated_size(self) -> int: """ - Returns an estimation of the total (heap) allocated size of the `DataFrame` in + Return an estimation of the total (heap) allocated size of the `DataFrame` in bytes. This estimation is the sum of the size of its buffers, validity, including @@ -365,9 +363,7 @@ def estimated_size(self) -> int: @classmethod def _from_pydf(cls: type[DF], py_df: PyDataFrame) -> DF: - """ - Construct Polars DataFrame from FFI PyDataFrame object. - """ + """Construct Polars DataFrame from FFI PyDataFrame object.""" df = cls.__new__(cls) df._df = py_df return df @@ -402,6 +398,7 @@ def _from_dict( Returns ------- DataFrame + """ return cls._from_pydf(dict_to_pydf(data, columns=columns)) @@ -430,6 +427,7 @@ def _from_records( Returns ------- DataFrame + """ return cls._from_pydf(sequence_to_pydf(data, columns=columns, orient=orient)) @@ -458,6 +456,7 @@ def _from_numpy( Returns ------- DataFrame + """ return cls._from_pydf(numpy_to_pydf(data, columns=columns, orient=orient)) @@ -488,6 +487,7 @@ def _from_arrow( Returns ------- DataFrame + """ return cls._from_pydf(arrow_to_pydf(data, columns=columns, rechunk=rechunk)) @@ -517,6 +517,7 @@ def _from_pandas( Returns ------- DataFrame + """ # path for table without rows that keeps datatype if data.shape[0] == 0: @@ -563,9 +564,7 @@ def _read_csv( sample_size: int = 1024, eol_char: str = "\n", ) -> DF: - """ - see pl.read_csv - """ + """See pl.read_csv.""" self = cls.__new__(cls) path: str | None @@ -674,7 +673,8 @@ def _read_parquet( row_count_offset: int = 0, low_memory: bool = False, ) -> DF: - """Read into a DataFrame from a parquet file. + """ + Read into a DataFrame from a parquet file. See Also -------- @@ -734,12 +734,15 @@ def _read_avro( ---------- file Path to a file or a file-like object. + columns + Columns. n_rows Stop reading from Apache Avro file after reading ``n_rows``. Returns ------- DataFrame + """ if isinstance(file, (str, Path)): file = format_path(file) @@ -771,14 +774,18 @@ def _read_ipc( list of column names. n_rows Stop reading from IPC file after reading ``n_rows``. + row_count_name + Row count name. + row_count_offset + Row count offset. rechunk Make sure that all data is contiguous. Returns ------- DataFrame - """ + """ if isinstance(file, (str, Path)): file = format_path(file) @@ -1074,6 +1081,7 @@ def write_json( Write to Json Lines format to_string Ignore file argument and return a string. + """ if isinstance(file, (str, Path)): file = format_path(file) @@ -1213,6 +1221,7 @@ def write_avro( - "uncompressed" - "snappy" - "deflate" + """ if isinstance(file, (str, Path)): file = format_path(file) @@ -1250,6 +1259,7 @@ def write_ipc( - "uncompressed" - "lz4" - "zstd" + """ if compression is None: compression = "uncompressed" @@ -1285,7 +1295,6 @@ def to_dicts(self) -> list[dict[str, Any]]: [{'foo': 1, 'bar': 4}, {'foo': 2, 'bar': 5}, {'foo': 3, 'bar': 6}] """ - pydf = self._df names = self.columns @@ -1457,6 +1466,7 @@ def write_parquet( At the moment C++ supports more features. kwargs Arguments are passed to ``pyarrow.parquet.write_table``. + """ if compression is None: compression = "uncompressed" @@ -1681,9 +1691,7 @@ def __repr__(self) -> str: return self.__str__() def __getattr__(self, item: Any) -> PySeries: - """ - Access columns as attribute. - """ + """Access columns as attribute.""" # it is important that we return an AttributeError here # this is used by ipython to check some private # `_ipython_canary_method_should_not_exist_` @@ -1833,9 +1841,7 @@ def __getitem__( | tuple ), ) -> DF | pli.Series: - """ - Does quite a lot. Read the comments. - """ + """Get item. Does quite a lot. Read the comments.""" if isinstance(item, pli.Expr): # pragma: no cover warnings.warn( "'using expressions in []' is deprecated. please use 'select'", @@ -2048,13 +2054,14 @@ def __len__(self) -> int: def _repr_html_(self) -> str: """ - Used by jupyter notebooks to get a html table. + Format output data in HTML for display in Jupyter Notebooks. Output rows and columns can be modified by setting the following ENVIRONMENT variables: * POLARS_FMT_MAX_COLS: set the number of columns * POLARS_FMT_MAX_ROWS: set the number of rows + """ max_cols = int(os.environ.get("POLARS_FMT_MAX_COLS", default=75)) max_rows = int(os.environ.get("POLARS_FMT_MAX_ROWS", default=25)) @@ -2353,6 +2360,7 @@ def columns(self, columns: Sequence[str]) -> None: columns A list with new names for the `DataFrame`. The length of the list should be equal to the width of the `DataFrame`. + """ self._df.set_column_names(columns) @@ -2389,6 +2397,7 @@ def dtypes(self) -> list[type[DataType]]: See Also -------- schema : Returns a {colname:dtype} mapping. + """ # noqa: E501 return self._df.dtypes() @@ -3213,7 +3222,6 @@ def groupby_rolling( └─────────────────────┴───────┴───────┴───────┘ """ - return RollingGroupBy(self, index_column, period, offset, closed, by) def groupby_dynamic( @@ -3228,11 +3236,12 @@ def groupby_dynamic( by: str | list[str] | pli.Expr | list[pli.Expr] | None = None, ) -> DynamicGroupBy: """ - Groups based on a time value (or index value of type Int32, Int64). Time windows - are calculated and rows are assigned to windows. Different from a normal groupby - is that a row can be member of multiple groups. The time/index window could be - seen as a rolling window, with a window size determined by dates/times/values - instead of slots in the DataFrame. + Group based on a time value (or index value of type Int32, Int64). + + Time windows are calculated and rows are assigned to windows. Different from a + normal groupby is that a row can be member of multiple groups. The time/index + window could be seen as a rolling window, with a window size determined by + dates/times/values instead of slots in the DataFrame. A window is defined by: @@ -3520,7 +3529,6 @@ def groupby_dynamic( └─────────────────┴─────────────────┴─────┴─────────────────┘ """ # noqa: E501 - return DynamicGroupBy( self, index_column, @@ -3789,7 +3797,7 @@ def join( asof_by_right: str | list[str] | None = None, ) -> DF: """ - SQL like joins. + Join in SQL-like fashion. Parameters ---------- @@ -3874,6 +3882,7 @@ def join( **Joining on columns with categorical data** See pl.StringCache(). + """ if how == "asof": # pragma: no cover warnings.warn( @@ -4009,6 +4018,7 @@ def apply( In this case it is better to use the following expression: >>> df.select(pl.col("foo") * 2 + pl.col("bar")) # doctest: +IGNORE_RESULT + """ out, is_df = self._df.apply(f, return_dtype, inference_size) if is_df: @@ -4723,6 +4733,7 @@ def pivot( Returns ------- + DataFrame Examples -------- @@ -5106,6 +5117,7 @@ def lazy(self: DF) -> pli.LazyFrame[DF]: Returns ------- LazyFrame + """ return self._lazyframe_class._from_pyldf(self._df.lazy()) @@ -5224,6 +5236,7 @@ def with_columns( ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │ └─────┴──────┴───────┴──────┴───────┘ + """ if exprs is not None and not isinstance(exprs, Sequence): exprs = [exprs] @@ -5987,6 +6000,7 @@ def take_every(self: DF, n: int) -> DF: ├╌╌╌╌╌┼╌╌╌╌╌┤ │ 3 ┆ 7 │ └─────┴─────┘ + """ return self.select(pli.col("*").take_every(n)) @@ -6322,6 +6336,7 @@ def __init__( Make sure that the order of the groups remain consistent. This is more expensive than a default groupby. Note that this only works in expression aggregations. + """ self._df = df self._dataframe_class = dataframe_class @@ -6342,6 +6357,7 @@ def _select(self, columns: str | list[str]) -> GBSelection[DF]: # pragma: no co ---------- columns One or multiple columns. + """ warnings.warn( "accessing GroupBy by index is deprecated, consider using the `.agg`" @@ -6594,7 +6610,6 @@ def agg( └─────┴─────────┴──────────────┘ """ - # a single list comprehension would be cleaner, but mypy complains on different # lines for py3.7 vs py3.10 about typing errors, so this is the same logic, # but broken down into two small functions @@ -6763,9 +6778,7 @@ def tail(self, n: int = 5) -> DF: ) def _select_all(self) -> GBSelection[DF]: - """ - Select all columns for aggregation. - """ + """Select all columns for aggregation.""" return GBSelection( self._df, self.by, @@ -7165,9 +7178,7 @@ def agg_list(self) -> DF: class PivotOps(Generic[DF]): - """ - Utility class returned in a pivot operation. - """ + """Utility class returned in a pivot operation.""" def __init__( self, @@ -7184,74 +7195,56 @@ def __init__( self._dataframe_class = dataframe_class def first(self) -> DF: - """ - Get the first value per group. - """ + """Get the first value per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "first") ) def sum(self) -> DF: - """ - Get the sum per group. - """ + """Get the sum per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "sum") ) def min(self) -> DF: - """ - Get the minimal value per group. - """ + """Get the minimal value per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "min") ) def max(self) -> DF: - """ - Get the maximal value per group. - """ + """Get the maximal value per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "max") ) def mean(self) -> DF: - """ - Get the mean value per group. - """ + """Get the mean value per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "mean") ) def count(self) -> DF: - """ - Count the values per group. - """ + """Count the values per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "count") ) def median(self) -> DF: - """ - Get the median value per group. - """ + """Get the median value per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "median") ) def last(self) -> DF: - """ - Get the last value per group. - """ + """Get the last value per group.""" return self._dataframe_class._from_pydf( self._df.pivot(self.by, self.pivot_column, self.values_column, "last") ) class GBSelection(Generic[DF]): - """ - Utility class returned in a groupby operation. - """ + """Utility class returned in a groupby operation.""" def __init__( self, @@ -7266,41 +7259,31 @@ def __init__( self._dataframe_class = dataframe_class def first(self) -> DF: - """ - Aggregate the first values in the group. - """ + """Aggregate the first values in the group.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "first") ) def last(self) -> DF: - """ - Aggregate the last values in the group. - """ + """Aggregate the last values in the group.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "last") ) def sum(self) -> DF: - """ - Reduce the groups to the sum. - """ + """Reduce the groups to the sum.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "sum") ) def min(self) -> DF: - """ - Reduce the groups to the minimal value. - """ + """Reduce the groups to the minimal value.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "min") ) def max(self) -> DF: - """ - Reduce the groups to the maximal value. - """ + """Reduce the groups to the maximal value.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "max") ) @@ -7337,17 +7320,13 @@ def count(self) -> DF: ) def mean(self) -> DF: - """ - Reduce the groups to the mean values. - """ + """Reduce the groups to the mean values.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "mean") ) def n_unique(self) -> DF: - """ - Count the unique values per group. - """ + """Count the unique values per group.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "n_unique") ) @@ -7371,17 +7350,13 @@ def quantile(self, quantile: float, interpolation: str = "nearest") -> DF: ) def median(self) -> DF: - """ - Return the median per group. - """ + """Return the median per group.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "median") ) def agg_list(self) -> DF: - """ - Aggregate the groups into Series. - """ + """Aggregate the groups into Series.""" return self._dataframe_class._from_pydf( self._df.groupby(self.by, self.selection, "agg_list") ) @@ -7391,9 +7366,7 @@ def apply( func: Callable[[Any], Any], return_dtype: type[DataType] | None = None, ) -> DF: - """ - Apply a function over the groups. - """ + """Apply a function over the groups.""" df = self.agg_list() if self.selection is None: raise TypeError( diff --git a/py-polars/polars/internals/functions.py b/py-polars/polars/internals/functions.py index 45a5186c7184..178f55959dd4 100644 --- a/py-polars/polars/internals/functions.py +++ b/py-polars/polars/internals/functions.py @@ -32,6 +32,7 @@ def get_dummies(df: pli.DataFrame) -> pli.DataFrame: ---------- df DataFrame to convert. + """ return df.to_dummies() @@ -83,8 +84,7 @@ def concat( how: str = "vertical", ) -> pli.DataFrame | pli.Series | pli.LazyFrame | pli.Expr: """ - Aggregate all the Dataframes/Series in a List of DataFrames/Series to a single - DataFrame/Series. + Aggregate multiple Dataframes/Series to a single DataFrame/Series. Parameters ---------- @@ -275,7 +275,7 @@ def cut( category_label: str = "category", ) -> pli.DataFrame: """ - Bin values into discrete values + Bin values into discrete values. .. warning:: This function is experimental and might change without it being considered a diff --git a/py-polars/polars/internals/io.py b/py-polars/polars/internals/io.py index 715053f4f02f..ce10a610d331 100644 --- a/py-polars/polars/internals/io.py +++ b/py-polars/polars/internals/io.py @@ -55,6 +55,8 @@ def _prepare_file_arg( file: str | list[str] | TextIO | Path | BinaryIO | bytes, **kwargs: Any ) -> ContextManager[str | BinaryIO | list[str] | list[BinaryIO]]: """ + Prepare file argument. + Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]). Returned value is always usable as a context. @@ -64,8 +66,8 @@ def _prepare_file_arg( When fsspec is installed, remote file(s) is (are) opened with `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`. - """ + """ # Small helper to use a variable as context @contextmanager def managed_file(file: Any) -> Iterator[Any]: @@ -109,6 +111,7 @@ def read_ipc_schema(file: str | BinaryIO | Path | bytes) -> dict[str, type[DataT Returns ------- Dictionary mapping column names to datatypes + """ if isinstance(file, (str, Path)): file = format_path(file) @@ -130,6 +133,7 @@ def read_parquet_schema( Returns ------- Dictionary mapping column names to datatypes + """ if isinstance(file, (str, Path)): file = format_path(file) diff --git a/py-polars/polars/internals/lazy_frame.py b/py-polars/polars/internals/lazy_frame.py index 5654cd243d3b..4763429f0476 100644 --- a/py-polars/polars/internals/lazy_frame.py +++ b/py-polars/polars/internals/lazy_frame.py @@ -1,5 +1,5 @@ """ -This module contains all expressions and classes needed for lazy computation/query +Module containing all expressions and classes needed for lazy computation/query execution. """ from __future__ import annotations @@ -80,9 +80,7 @@ def _prepare_groupby_inputs( class LazyFrame(Generic[DF]): - """ - Representation of a Lazy computation graph/ query. - """ + """Representation of a Lazy computation graph/query.""" def __init__(self) -> None: self._ldf: PyLazyFrame @@ -191,7 +189,6 @@ def scan_parquet( -------- scan_ipc, scan_csv """ - # try fsspec scanner if not pli._is_local_file(file): scan = pli._scan_parquet_fsspec(file, storage_options) @@ -272,6 +269,7 @@ def read_json( See Also -------- write_json + """ if isinstance(file, StringIO): file = BytesIO(file.getvalue().encode()) @@ -326,6 +324,7 @@ def write_json( See Also -------- read_json + """ if isinstance(file, (str, Path)): file = format_path(file) @@ -435,9 +434,7 @@ def __str__(self) -> str: """ def describe_plan(self) -> str: - """ - A string representation of the unoptimized query plan. - """ + """Create a string representation of the unoptimized query plan.""" return self._ldf.describe_plan() def describe_optimized_plan( @@ -448,10 +445,7 @@ def describe_optimized_plan( simplify_expression: bool = True, slice_pushdown: bool = True, ) -> str: - """ - A string representation of the optimized query plan. - """ - + """Create a string representation of the optimized query plan.""" ldf = self._ldf.optimization_toggle( type_coercion, predicate_pushdown, @@ -486,6 +480,7 @@ def show_graph( Return dot syntax. This cannot be combined with `show` figsize Passed to matplotlib if `show` == True. + """ if raw_output: show = False @@ -535,7 +530,7 @@ def show_graph( def inspect(self: LDF, fmt: str = "{}") -> LDF: """ - Prints the value that this node in the computation graph evaluates to and passes + Print the value that this node in the computation graph evaluates to and passes on the value. >>> df = pl.DataFrame({"foo": [1, 1, -2, 3]}).lazy() @@ -579,6 +574,7 @@ def sort( Sort in descending order. nulls_last Place null values last. Can only be used if sorted by a single column. + """ if type(by) is str: return self._from_pyldf(self._ldf.sort(by, reverse, nulls_last)) @@ -631,6 +627,7 @@ def collect( Returns ------- DataFrame + """ if no_optimization: predicate_pushdown = False @@ -691,6 +688,7 @@ def fetch( Returns ------- DataFrame + """ if no_optimization: predicate_pushdown = False @@ -709,7 +707,7 @@ def fetch( def lazy(self: LDF) -> LDF: """ - Returns lazy representation, i.e. itself. + Return lazy representation, i.e. itself. Useful for writing code that expects either a :class:`DataFrame` or :class:`LazyFrame`. @@ -717,6 +715,7 @@ def lazy(self: LDF) -> LDF: Returns ------- LazyFrame + """ return self @@ -765,6 +764,7 @@ def dtypes(self) -> list[type[DataType]]: See Also -------- schema : Returns a {colname:dtype} mapping. + """ # noqa: E501 return self._ldf.dtypes() @@ -789,15 +789,14 @@ def schema(self) -> dict[str, type[DataType]]: return self._ldf.schema() def cache(self: LDF) -> LDF: - """ - Cache the result once the execution of the physical plan hits this node. - """ + """Cache the result once the execution of the physical plan hits this node.""" return self._from_pyldf(self._ldf.cache()) def cleared(self: LDF) -> LDF: """ - Create an empty copy of the current LazyFrame, with identical schema but no - data. + Create an empty copy of the current LazyFrame. + + The copy has an identical schema but no data. See Also -------- @@ -832,6 +831,7 @@ def clone(self: LDF) -> LDF: -------- cleared : Create an empty copy of the current LazyFrame, with identical schema but no data. + """ return self._from_pyldf(self._ldf.clone()) @@ -1107,11 +1107,12 @@ def groupby_dynamic( by: str | list[str] | pli.Expr | list[pli.Expr] | None = None, ) -> LazyGroupBy[LDF]: """ - Groups based on a time value (or index value of type Int32, Int64). Time windows - are calculated and rows are assigned to windows. Different from a normal groupby - is that a row can be member of multiple groups. The time/index window could be - seen as a rolling window, with a window size determined by dates/times/values - instead of slots in the DataFrame. + Group based on a time value (or index value of type Int32, Int64). + + Time windows are calculated and rows are assigned to windows. Different from a + normal groupby is that a row can be member of multiple groups. The time/index + window could be seen as a rolling window, with a window size determined by + dates/times/values instead of slots in the DataFrame. .. seealso:: @@ -1177,7 +1178,6 @@ def groupby_dynamic( Also group by this column/these columns """ - if offset is None: if period is None: offset = f"-{every}" @@ -1278,6 +1278,7 @@ def join_asof( force_parallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. + """ if not isinstance(other, LazyFrame): raise ValueError(f"Expected a `LazyFrame` as join table, got {type(other)}") @@ -1597,6 +1598,7 @@ def with_columns( ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false ┆ foo │ └─────┴──────┴───────┴──────┴───────┴─────┘ + """ if named_exprs and not Config.with_columns_kwargs: raise RuntimeError( @@ -1696,15 +1698,14 @@ def rename(self: LDF, mapping: dict[str, str]) -> LDF: ---------- mapping Key value pairs that map from old name to new name. + """ existing = list(mapping.keys()) new = list(mapping.values()) return self._from_pyldf(self._ldf.rename(existing, new)) def reverse(self: LDF) -> LDF: - """ - Reverse the DataFrame. - """ + """Reverse the DataFrame.""" return self._from_pyldf(self._ldf.reverse()) def shift(self: LDF, periods: int) -> LDF: @@ -1864,12 +1865,13 @@ def limit(self: LDF, n: int = 5) -> LDF: ---------- n Number of rows. + """ return self.slice(0, n) def head(self: LDF, n: int = 5) -> LDF: """ - Gets the first `n` rows of the DataFrame. + Get the first `n` rows of the DataFrame. .. note:: Consider using the :func:`fetch` operation when you only want to test your @@ -1883,6 +1885,7 @@ def head(self: LDF, n: int = 5) -> LDF: ---------- n Number of rows. + """ return self.limit(n) @@ -1894,19 +1897,16 @@ def tail(self: LDF, n: int = 5) -> LDF: ---------- n Number of rows. + """ return self._from_pyldf(self._ldf.tail(n)) def last(self: LDF) -> LDF: - """ - Get the last row of the DataFrame. - """ + """Get the last row of the DataFrame.""" return self.tail(1) def first(self: LDF) -> LDF: - """ - Get the first row of the DataFrame. - """ + """Get the first row of the DataFrame.""" return self.slice(0, 1) def with_row_count(self: LDF, name: str = "row_nr", offset: int = 0) -> LDF: @@ -1967,6 +1967,7 @@ def take_every(self: LDF, n: int) -> LDF: ├╌╌╌╌╌┼╌╌╌╌╌┤ │ 3 ┆ 7 │ └─────┴─────┘ + """ return self.select(pli.col("*").take_every(n)) @@ -1978,6 +1979,7 @@ def fill_null(self: LDF, fill_value: int | str | pli.Expr) -> LDF: ---------- fill_value Value to fill the missing values with. + """ if not isinstance(fill_value, pli.Expr): fill_value = pli.lit(fill_value) @@ -1995,57 +1997,42 @@ def fill_nan(self: LDF, fill_value: int | str | float | pli.Expr) -> LDF: ---------- fill_value Value to fill the NaN values with. + """ if not isinstance(fill_value, pli.Expr): fill_value = pli.lit(fill_value) return self._from_pyldf(self._ldf.fill_nan(fill_value._pyexpr)) def std(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their standard deviation value. - """ + """Aggregate the columns in the DataFrame to their standard deviation value.""" return self._from_pyldf(self._ldf.std()) def var(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their variance value. - """ + """Aggregate the columns in the DataFrame to their variance value.""" return self._from_pyldf(self._ldf.var()) def max(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their maximum value. - """ + """Aggregate the columns in the DataFrame to their maximum value.""" return self._from_pyldf(self._ldf.max()) def min(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their minimum value. - """ + """Aggregate the columns in the DataFrame to their minimum value.""" return self._from_pyldf(self._ldf.min()) def sum(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their sum value. - """ + """Aggregate the columns in the DataFrame to their sum value.""" return self._from_pyldf(self._ldf.sum()) def mean(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their mean value. - """ + """Aggregate the columns in the DataFrame to their mean value.""" return self._from_pyldf(self._ldf.mean()) def median(self: LDF) -> LDF: - """ - Aggregate the columns in the DataFrame to their median value. - """ + """Aggregate the columns in the DataFrame to their median value.""" return self._from_pyldf(self._ldf.median()) def quantile(self: LDF, quantile: float, interpolation: str = "nearest") -> LDF: - """ - Aggregate the columns in the DataFrame to their quantile value. - """ + """Aggregate the columns in the DataFrame to their quantile value.""" return self._from_pyldf(self._ldf.quantile(quantile, interpolation)) def explode( @@ -2148,6 +2135,7 @@ def unique( Returns ------- DataFrame with unique rows + """ if subset is not None and not isinstance(subset, list): subset = [subset] @@ -2329,6 +2317,7 @@ def map( Allow projection pushdown optimization to pass this node. no_optimizations Turn off all optimizations past this point. + """ if no_optimizations: predicate_pushdown = False @@ -2429,9 +2418,7 @@ def unnest(self: LDF, names: str | list[str]) -> LDF: class LazyGroupBy(Generic[LDF]): - """ - Created by `df.lazy().groupby("foo)"` - """ + """Created by `df.lazy().groupby("foo)"`.""" def __init__(self, lgb: PyLazyGroupBy, lazyframe_class: type[LDF]) -> None: self.lgb = lgb diff --git a/py-polars/polars/internals/lazy_functions.py b/py-polars/polars/internals/lazy_functions.py index c06a94e0253d..8f202944bf2b 100644 --- a/py-polars/polars/internals/lazy_functions.py +++ b/py-polars/polars/internals/lazy_functions.py @@ -68,7 +68,8 @@ def col( name: (str | list[str] | Sequence[PolarsDataType] | pli.Series | PolarsDataType), ) -> pli.Expr: """ - A column in a DataFrame. + Return an expression representing a column in a DataFrame. + Can be used to select: - a single column by name @@ -227,7 +228,6 @@ def element() -> pli.Expr: └─────┴─────┴─────────────┘ """ - return col("") @@ -258,6 +258,7 @@ def count(column: str | pli.Series | None = None) -> pli.Expr | int: * ``pl.Series`` : count the values in the series. * ``str`` : count the values in this column. * ``None`` : count the number of values in this context. + """ if column is None: return pli.wrap_expr(_count()) @@ -272,6 +273,7 @@ def to_list(name: str) -> pli.Expr: Aggregate to list. Re-exported as `pl.list()` + """ return col(name).list() @@ -287,9 +289,7 @@ def std(column: pli.Series) -> float | None: def std(column: str | pli.Series) -> pli.Expr | float | None: - """ - Get the standard deviation. - """ + """Get the standard deviation.""" if isinstance(column, pli.Series): return column.std() return col(column).std() @@ -306,9 +306,7 @@ def var(column: pli.Series) -> float | None: def var(column: str | pli.Series) -> pli.Expr | float | None: - """ - Get the variance. - """ + """Get the variance.""" if isinstance(column, pli.Series): return column.var() return col(column).var() @@ -335,6 +333,7 @@ def max(column: str | list[pli.Expr | str] | pli.Series) -> pli.Expr | Any: the input: - Union[str, Series] -> aggregate the maximum value of that column. - List[Expr] -> aggregate the maximum value horizontally. + """ if isinstance(column, pli.Series): return column.max() @@ -364,6 +363,7 @@ def min(column: str | list[pli.Expr | str] | pli.Series) -> pli.Expr | Any: the input: - Union[str, Series] -> aggregate the sum value of that column. - List[Expr] -> aggregate the sum value horizontally. + """ if isinstance(column, pli.Series): return column.min() @@ -393,6 +393,7 @@ def sum(column: str | list[pli.Expr | str] | pli.Series) -> pli.Expr | Any: the input: - Union[str, Series] -> aggregate the sum value of that column. - List[Expr] -> aggregate the sum value horizontally. + """ if isinstance(column, pli.Series): return column.sum() @@ -416,9 +417,7 @@ def mean(column: pli.Series) -> float: def mean(column: str | pli.Series) -> pli.Expr | float: - """ - Get the mean value. - """ + """Get the mean value.""" if isinstance(column, pli.Series): return column.mean() return col(column).mean() @@ -435,9 +434,7 @@ def avg(column: pli.Series) -> float: def avg(column: str | pli.Series) -> pli.Expr | float: - """ - Alias for mean. - """ + """Alias for mean.""" return mean(column) @@ -452,9 +449,7 @@ def median(column: pli.Series) -> float | int: def median(column: str | pli.Series) -> pli.Expr | float | int: - """ - Get the median value. - """ + """Get the median value.""" if isinstance(column, pli.Series): return column.median() return col(column).median() @@ -505,7 +500,6 @@ def first(column: str | pli.Series | None = None) -> pli.Expr | Any: - Series -> Take first value in `Series` """ - if column is None: return pli.wrap_expr(_first()) @@ -538,13 +532,11 @@ def last(column: str | pli.Series | None = None) -> pli.Expr: Depending on the input type this function does different things: - input: - - None -> expression to take last column of a context. - str -> syntactic sugar for `pl.col(..).last()` - Series -> Take last value in `Series` - """ + """ if column is None: return pli.wrap_expr(_last()) @@ -576,6 +568,7 @@ def head(column: str | pli.Series, n: int | None = None) -> pli.Expr | pli.Serie Column name or Series. n Number of rows to take. + """ if isinstance(column, pli.Series): return column.head(n) @@ -602,6 +595,7 @@ def tail(column: str | pli.Series, n: int | None = None) -> pli.Expr | pli.Serie Column name or Series. n Number of rows to take. + """ if isinstance(column, pli.Series): return column.tail(n) @@ -613,7 +607,7 @@ def lit( dtype: type[DataType] | None = None, ) -> pli.Expr: """ - A literal value. + Return an expression representing a literal value. Parameters ---------- @@ -703,6 +697,7 @@ def spearman_rank_corr( Column name or Expression. b Column name or Expression. + """ if isinstance(a, str): a = col(a) @@ -724,6 +719,7 @@ def pearson_corr( Column name or Expression. b Column name or Expression. + """ if isinstance(a, str): a = col(a) @@ -745,6 +741,7 @@ def cov( Column name or Expression. b Column name or Expression. + """ if isinstance(a, str): a = col(a) @@ -774,6 +771,7 @@ def map( Returns ------- Expr + """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=False)) @@ -807,6 +805,7 @@ def apply( Returns ------- Expr + """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=True)) @@ -834,6 +833,7 @@ def map_binary( Function to apply. return_dtype Output type of the udf. + """ if isinstance(a, str): a = col(a) @@ -860,6 +860,7 @@ def fold( Fn(acc, value) -> new_value exprs Expressions to aggregate over. May also be a wildcard expression. + """ # in case of pl.col("*") acc = pli.expr_to_lit_or_expr(acc, str_to_lit=True) @@ -871,9 +872,7 @@ def fold( def any(name: str | list[pli.Expr]) -> pli.Expr: - """ - Evaluate columnwise or elementwise with a bitwise OR operation. - """ + """Evaluate columnwise or elementwise with a bitwise OR operation.""" if isinstance(name, list): return fold(lit(False), lambda a, b: a.cast(bool) | b.cast(bool), name).alias( "any" @@ -985,14 +984,13 @@ def exclude( def all(name: str | list[pli.Expr] | None = None) -> pli.Expr: """ - This function is two things + Do one of two things. * function can do a columnwise or elementwise AND operation * a wildcard column selection Parameters ---------- - name If given this function will apply a bitwise & on the columns. @@ -1024,16 +1022,12 @@ def all(name: str | list[pli.Expr] | None = None) -> pli.Expr: def groups(column: str) -> pli.Expr: - """ - Syntactic sugar for `pl.col("foo").agg_groups()`. - """ + """Syntactic sugar for `pl.col("foo").agg_groups()`.""" return col(column).agg_groups() def quantile(column: str, quantile: float, interpolation: str = "nearest") -> pli.Expr: - """ - Syntactic sugar for `pl.col("foo").quantile(..)`. - """ + """Syntactic sugar for `pl.col("foo").quantile(..)`.""" return col(column).quantile(quantile, interpolation) @@ -1095,6 +1089,7 @@ def arange( Step size of the range. eager If eager evaluation is `True`, a Series is returned instead of an Expr. + """ low = pli.expr_to_lit_or_expr(low, str_to_lit=False) high = pli.expr_to_lit_or_expr(high, str_to_lit=False) @@ -1123,6 +1118,7 @@ def argsort_by( Columns use to determine the ordering. reverse Default is ascending. + """ if isinstance(exprs, str) or not isinstance(exprs, Sequence): exprs = [exprs] @@ -1235,8 +1231,8 @@ def _datetime( Returns ------- Expr of type `pl.Datetime` - """ + """ year_expr = pli.expr_to_lit_or_expr(year, str_to_lit=False) month_expr = pli.expr_to_lit_or_expr(month, str_to_lit=False) day_expr = pli.expr_to_lit_or_expr(day, str_to_lit=False) @@ -1282,6 +1278,7 @@ def _date( Returns ------- Expr of type pl.Date + """ return _datetime(year, month, day).cast(Date).alias("date") @@ -1338,7 +1335,7 @@ def concat_str(exprs: Sequence[pli.Expr | str] | pli.Expr, sep: str = "") -> pli def format(fstring: str, *args: pli.Expr | str) -> pli.Expr: """ - String format utility for expressions + Format expressions as a string. Parameters ---------- @@ -1454,11 +1451,14 @@ def collect_all( slice_pushdown: bool = False, ) -> list[pli.DataFrame]: """ - Collect multiple LazyFrames at the same time. This runs all the computation graphs - in parallel on Polars threadpool. + Collect multiple LazyFrames at the same time. + + This runs all the computation graphs in parallel on Polars threadpool. Parameters ---------- + lazy_frames + A list of LazyFrames to collect. type_coercion Do type coercion optimization. predicate_pushdown @@ -1482,6 +1482,7 @@ def collect_all( Returns ------- List[DataFrame] + """ if no_optimization: predicate_pushdown = False @@ -1581,7 +1582,7 @@ def struct( eager: bool = False, ) -> pli.Expr | pli.Series: """ - Collect several columns into a Series of dtype Struct + Collect several columns into a Series of dtype Struct. Parameters ---------- @@ -1633,7 +1634,6 @@ def struct( └─────┴───────┴─────┴─────────────┘ """ - if eager: return pli.select(struct(exprs, eager=False)).to_series() exprs = pli.selection_to_pyexpr_list(exprs) @@ -1693,6 +1693,7 @@ def repeat( Run eagerly and collect into a `Series` name Only used in `eager` mode. As expression, us `alias` + """ if eager: if name is None: @@ -1734,6 +1735,8 @@ def arg_where( ---------- condition Boolean expression to evaluate + eager + Whether to apply this function eagerly (as opposed to lazily). Examples -------- @@ -1749,6 +1752,7 @@ def arg_where( 1 3 ] + """ if eager: if not isinstance(condition, pli.Series): diff --git a/py-polars/polars/internals/series.py b/py-polars/polars/internals/series.py index 71314ff2267e..fd940c58c5f8 100644 --- a/py-polars/polars/internals/series.py +++ b/py-polars/polars/internals/series.py @@ -110,6 +110,7 @@ def get_ffi_func( Returns ------- ffi function, or None if not found + """ ffi_name = dtype_to_ffiname(dtype) fname = name.replace("<>", ffi_name) @@ -250,9 +251,7 @@ def _repeat( @classmethod def _from_arrow(cls, name: str, values: pa.Array, rechunk: bool = True) -> Series: - """ - Construct a Series from an Arrow Array. - """ + """Construct a Series from an Arrow Array.""" return cls._from_pyseries(arrow_to_pyseries(name, values, rechunk)) @classmethod @@ -262,9 +261,7 @@ def _from_pandas( values: pd.Series | pd.DatetimeIndex, nan_to_none: bool = True, ) -> Series: - """ - Construct a Series from a pandas Series or DatetimeIndex. - """ + """Construct a Series from a pandas Series or DatetimeIndex.""" return cls._from_pyseries( pandas_to_pyseries(name, values, nan_to_none=nan_to_none) ) @@ -621,7 +618,7 @@ def __setitem__( def estimated_size(self) -> int: """ - Returns an estimation of the total (heap) allocated size of the `Series` in + Return an estimation of the total (heap) allocated size of the `Series` in bytes. This estimation is the sum of the size of its buffers, validity, including @@ -634,6 +631,7 @@ def estimated_size(self) -> int: this function returns the visible size of the buffer, not its total capacity. FFI buffers are included in this estimation. + """ return self._s.estimated_size() @@ -661,6 +659,7 @@ def any(self) -> bool: Returns ------- Boolean literal + """ return self.to_frame().select(pli.col(self.name).any()).to_series()[0] @@ -671,35 +670,28 @@ def all(self) -> bool: Returns ------- Boolean literal + """ return self.to_frame().select(pli.col(self.name).all()).to_series()[0] def log(self, base: float = math.e) -> Series: - """ - Compute the logarithm to a given base - """ + """Compute the logarithm to a given base.""" return self.to_frame().select(pli.col(self.name).log(base)).to_series() def log10(self) -> Series: - """ - Return the base 10 logarithm of the input array, element-wise. - """ + """Compute the base 10 logarithm of the input array, element-wise.""" return self.log(10.0) def exp(self) -> Series: - """ - Return the exponential element-wise - """ + """Compute the exponential, element-wise.""" return self.to_frame().select(pli.col(self.name).exp()).to_series() def drop_nulls(self) -> Series: - """ - Create a new Series that copies data from this Series without null values. - """ + """Create a new Series that copies data from this Series without null values.""" return wrap_s(self._s.drop_nulls()) def drop_nans(self) -> Series: - """ """ + """Drop NaN values.""" return self.filter(self.is_not_nan()) def to_frame(self) -> pli.DataFrame: @@ -752,6 +744,7 @@ def inner_dtype(self) -> type[DataType] | None: Returns ------- DataType + """ return self._s.inner_dtype() @@ -877,9 +870,7 @@ def mean(self) -> int | float: return self._s.mean() def product(self) -> int | float: - """ - Reduce this Series to the product value. - """ + """Reduce this Series to the product value.""" return self.to_frame().select(pli.col(self.name).product()).to_series()[0] def min(self) -> int | float | date | datetime | timedelta: @@ -1050,7 +1041,7 @@ def value_counts(self, sort: bool = False) -> pli.DataFrame: def unique_counts(self) -> Series: """ - Returns a count of the unique values in the order of appearance. + Return a count of the unique values in the order of appearance. Examples -------- @@ -1063,6 +1054,7 @@ def unique_counts(self) -> Series: 2 3 ] + """ return pli.select(pli.lit(self).unique_counts()).to_series() @@ -1138,14 +1130,12 @@ def cumulative_eval( @property def name(self) -> str: - """ - Get the name of this Series. - """ + """Get the name of this Series.""" return self._s.name() def alias(self, name: str) -> Series: """ - Returns a copy of the Series with a new alias/name. + Return a copy of the Series with a new alias/name. Parameters ---------- @@ -1156,6 +1146,7 @@ def alias(self, name: str) -> Series: -------- >>> srs = pl.Series("x", [1, 2, 3]) >>> new_aliased_srs = srs.alias("y") + """ s = self.clone() s._s.rename(name) @@ -1204,15 +1195,11 @@ def rename(self, name: str, in_place: bool = False) -> Series | None: return self.alias(name) def chunk_lengths(self) -> list[int]: - """ - Get the length of each individual chunk. - """ + """Get the length of each individual chunk.""" return self._s.chunk_lengths() def n_chunks(self) -> int: - """ - Get the number of chunks that this Series contains. - """ + """Get the number of chunks that this Series contains.""" return self._s.n_chunks() def cumsum(self, reverse: bool = False) -> Series: @@ -1602,21 +1589,15 @@ def argsort(self, reverse: bool = False, nulls_last: bool = False) -> Series: return wrap_s(self._s.argsort(reverse, nulls_last)) def arg_unique(self) -> Series: - """ - Get unique index as Series. - """ + """Get unique index as Series.""" return wrap_s(self._s.arg_unique()) def arg_min(self) -> int | None: - """ - Get the index of the minimal value. - """ + """Get the index of the minimal value.""" return self._s.arg_min() def arg_max(self) -> int | None: - """ - Get the index of the maximal value. - """ + """Get the index of the maximal value.""" return self._s.arg_max() def unique(self, maintain_order: bool = False) -> Series: @@ -1671,16 +1652,16 @@ def take(self, indices: np.ndarray | list[int] | pli.Expr) -> Series: return wrap_s(self._s.take(indices)) def null_count(self) -> int: - """ - Count the null values in this Series. - """ + """Count the null values in this Series.""" return self._s.null_count() def has_validity(self) -> bool: """ - Returns True if the Series has a validity bitmask. If there is none, it means - that there are no null values. Use this to swiftly assert a Series does not have - null values. + Return True if the Series has a validity bitmask. + + If there is none, it means that there are no null values. + Use this to swiftly assert a Series does not have null values. + """ return self._s.has_validity() @@ -1904,6 +1885,7 @@ def arg_true(self) -> Series: Returns ------- UInt32 Series + """ return pli.arg_where(self, eager=True) @@ -1938,6 +1920,7 @@ def is_first(self) -> Series: Returns ------- Boolean Series + """ return wrap_s(self._s.is_first()) @@ -1989,6 +1972,7 @@ def explode(self) -> Series: Returns ------- Exploded Series of same dtype + """ return wrap_s(self._s.explode()) @@ -2035,9 +2019,7 @@ def len(self) -> int: @property def shape(self) -> tuple[int]: - """ - Shape of this Series. - """ + """Shape of this Series.""" return (self._s.len(),) def __len__(self) -> int: @@ -2162,6 +2144,7 @@ def rechunk(self, in_place: bool = False) -> Series | None: ---------- in_place In place or not. + """ opt_s = self._s.rechunk(in_place) if in_place: @@ -2184,6 +2167,7 @@ def reverse(self) -> Series: 2 1 ] + """ return wrap_s(self._s.reverse()) @@ -2253,7 +2237,7 @@ def is_boolean(self) -> bool: def is_utf8(self) -> bool: """ - Checks if this Series datatype is a Utf8. + Check if this Series datatype is a Utf8. Examples -------- @@ -2305,9 +2289,7 @@ def __array_ufunc__( *inputs: Any, **kwargs: Any, ) -> Series: - """ - Numpy universal functions. - """ + """Numpy universal functions.""" if not _NUMPY_AVAILABLE: raise ImportError("'numpy' is required for this functionality.") @@ -2412,6 +2394,7 @@ def to_numpy( of nulls, or for non-primitive types). kwargs kwargs will be sent to pyarrow.Array.to_numpy + """ def convert_to_date(arr: np.ndarray) -> np.ndarray: @@ -2457,9 +2440,7 @@ def to_arrow(self) -> pa.Array: return self._s.to_arrow() def to_pandas(self) -> pd.Series: - """ - Convert this Series to a pandas Series - """ + """Convert this Series to a pandas Series.""" if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required for converting a 'polars' Series to a 'pandas'" @@ -2481,6 +2462,7 @@ def set(self, filter: Series, value: int | float | str) -> Series: Boolean mask. value Value to replace the the masked values with. + """ f = get_ffi_func("set_with_mask_<>", self.dtype, self._s) if f is None: @@ -2519,8 +2501,8 @@ def set_at_idx( Returns ------- the series mutated - """ + """ if self.is_numeric() or self.is_datelike(): idx = Series("", idx) if isinstance(value, (int, float, bool)): @@ -2590,6 +2572,7 @@ def clone(self) -> "Series": -------- cleared : Create an empty copy of the current Series, with identical schema but no data. + """ return wrap_s(self._s.clone()) @@ -2600,9 +2583,7 @@ def __deepcopy__(self, memo: None = None) -> Series: return self.clone() def fill_nan(self, fill_value: str | int | float | bool | pli.Expr) -> Series: - """ - Fill floating point NaN value with a fill value - """ + """Fill floating point NaN value with a fill value.""" return ( self.to_frame().select(pli.col(self.name).fill_nan(fill_value)).to_series() ) @@ -2666,6 +2647,7 @@ def floor(self) -> Series: the float value. Only works on floating point Series + """ return wrap_s(self._s.floor()) @@ -2675,6 +2657,7 @@ def ceil(self) -> Series: the float value. Only works on floating point Series + """ return self.to_frame().select(pli.col(self.name).ceil()).to_series() @@ -2698,6 +2681,7 @@ def round(self, decimals: int) -> Series: ---------- decimals number of decimals to round by. + """ return wrap_s(self._s.round(decimals)) @@ -2716,6 +2700,7 @@ def dot(self, other: Series) -> float | None: ---------- other Series to compute dot product with + """ return self._s.dot(other._s) @@ -2738,7 +2723,7 @@ def mode(self) -> Series: def sign(self) -> Series: """ - Returns an element-wise indication of the sign of a number. + Return an element-wise indication of the sign of a number. Examples -------- @@ -3028,6 +3013,7 @@ def apply( Returns ------- Series + """ if return_dtype is None: pl_return_dtype = None @@ -3064,6 +3050,7 @@ def shift(self, periods: int = 1) -> Series: ---------- periods Number of places to shift (may be negative). + """ return wrap_s(self._s.shift(periods)) @@ -3078,6 +3065,7 @@ def shift_and_fill(self, periods: int, fill_value: int | pli.Expr) -> Series: Number of places to shift (may be negative). fill_value Fill None values with the result of this expression. + """ return self.to_frame().select( pli.col(self.name).shift_and_fill(periods, fill_value) @@ -3136,7 +3124,8 @@ def rolling_min( center: bool = False, ) -> Series: """ - apply a rolling min (moving min) over the values in this array. + Apply a rolling min (moving min) over the values in this array. + A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their sum. @@ -3418,9 +3407,9 @@ def rolling_apply( center: bool = False, ) -> pli.Series: """ - Allows a custom rolling window function. - Prefer the specific rolling window functions over this one, as they are faster. - Prefer: + Apply a custom rolling window function. + + Prefer the specific rolling window functions over this one, as they are faster: * rolling_min * rolling_max @@ -3556,6 +3545,7 @@ def rolling_skew(self, window_size: int, bias: bool = True) -> Series: Size of the rolling window bias If False, then the calculations are corrected for statistical bias. + """ return self.to_frame().select( pli.col(self.name).rolling_skew(window_size, bias) @@ -3742,6 +3732,7 @@ def reinterpret(self, signed: bool = True) -> Series: ---------- signed If True, reinterpret as `pl.Int64`. Otherwise, reinterpret as `pl.UInt64`. + """ return wrap_s(self._s.reinterpret(signed)) @@ -3767,9 +3758,7 @@ def interpolate(self) -> Series: return wrap_s(self._s.interpolate()) def abs(self) -> Series: - """ - Take absolute values - """ + """Compute absolute values.""" return wrap_s(self._s.abs()) def rank(self, method: str = "average", reverse: bool = False) -> Series: @@ -3842,6 +3831,7 @@ def diff(self, n: int = 1, null_behavior: str = "ignore") -> Series: number of slots to shift null_behavior {'ignore', 'drop'} + """ return wrap_s(self._s.diff(n, null_behavior)) @@ -3888,11 +3878,14 @@ def pct_change(self, n: int = 1) -> Series: 3.0 3.0 ] + """ return self.to_frame().select(pli.col(self.name).pct_change(n)).to_series() def skew(self, bias: bool = True) -> float | None: - r"""Compute the sample skewness of a data set. + r""" + Compute the sample skewness of a data set. + For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The @@ -3931,7 +3924,9 @@ def skew(self, bias: bool = True) -> float | None: return self._s.skew(bias) def kurtosis(self, fisher: bool = True, bias: bool = True) -> float | None: - """Compute the kurtosis (Fisher or Pearson) of a dataset. + """ + Compute the kurtosis (Fisher or Pearson) of a dataset. + Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. @@ -3947,6 +3942,7 @@ def kurtosis(self, fisher: bool = True, bias: bool = True) -> float | None: Pearson's definition is used (normal ==> 3.0). bias : bool, optional If False, then the calculations are corrected for statistical bias. + """ return self._s.kurtosis(fisher, bias) @@ -4000,6 +3996,7 @@ def reshape(self, dims: tuple[int, ...]) -> Series: Returns ------- Series + """ return wrap_s(self._s.reshape(dims)) @@ -4011,6 +4008,7 @@ def shuffle(self, seed: int = 0) -> Series: ---------- seed Seed initialization + """ return wrap_s(self._s.shuffle(seed)) @@ -4199,14 +4197,13 @@ def set_sorted(self, reverse: bool = False) -> Series: ---------- reverse If the `Series` order is reversed, e.g. descending. + """ return wrap_s(self._s.set_sorted(reverse)) @property def time_unit(self) -> str | None: - """ - Get the time unit of underlying Datetime Series as {"ns", "us", "ms"} - """ + """Get the time unit of underlying Datetime Series as {"ns", "us", "ms"}.""" return self._s.time_unit() # Below are the namespaces defined. Do not move these up in the definition of @@ -4215,37 +4212,27 @@ def time_unit(self) -> str | None: @property def dt(self) -> DateTimeNameSpace: - """ - Create an object namespace of all datetime related methods. - """ + """Create an object namespace of all datetime related methods.""" return DateTimeNameSpace(self) @property def arr(self) -> ListNameSpace: - """ - Create an object namespace of all list related methods. - """ + """Create an object namespace of all list related methods.""" return ListNameSpace(self) @property def str(self) -> StringNameSpace: - """ - Create an object namespace of all string related methods. - """ + """Create an object namespace of all string related methods.""" return StringNameSpace(self) @property def cat(self) -> CatNameSpace: - """ - Create an object namespace of all categorical related methods. - """ + """Create an object namespace of all categorical related methods.""" return CatNameSpace(self) @property def struct(self) -> StructNameSpace: - """ - Create an object namespace of all struct related methods. - """ + """Create an object namespace of all struct related methods.""" return StructNameSpace(self) @@ -4254,27 +4241,24 @@ def __init__(self, s: Series): self.s = s def to_frame(self) -> pli.DataFrame: - """ - Convert this Struct Series to a DataFrame - """ + """Convert this Struct Series to a DataFrame.""" return pli.wrap_df(self.s._s.struct_to_frame()) def field(self, name: str) -> Series: """ - Retrieve one of the fields of this `Struct` as a new Series + Retrieve one of the fields of this `Struct` as a new Series. Parameters ---------- name Name of the field + """ return pli.select(pli.lit(self.s).struct.field(name)).to_series() @property def fields(self) -> list[str]: - """ - Get the names of the fields - """ + """Get the names of the fields.""" return self.s._s.struct_fields() def rename_fields(self, names: list[str]) -> Series: @@ -4285,14 +4269,13 @@ def rename_fields(self, names: list[str]) -> Series: ---------- names New names in the order of the struct's fields + """ return pli.select(pli.lit(self.s).struct.rename_fields(names)).to_series() class StringNameSpace: - """ - Series.str namespace. - """ + """Series.str namespace.""" def __init__(self, series: Series): self._s = series._s @@ -4366,7 +4349,6 @@ def strptime( └────────────┘ """ - s = wrap_s(self._s) return ( s.to_frame() @@ -4485,6 +4467,7 @@ def ends_with(self, sub: str) -> Series: -------- contains : Check if string contains a substring that matches a regex. starts_with : Check if string values start with a substring. + """ s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).str.ends_with(sub)).to_series() @@ -4514,13 +4497,14 @@ def starts_with(self, sub: str) -> Series: -------- contains : Check if string contains a substring that matches a regex. ends_with : Check if string values end with a substring. + """ s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).str.starts_with(sub)).to_series() def decode(self, encoding: str, strict: bool = False) -> Series: """ - Decodes a value using the provided encoding. + Decode a value using the provided encoding. Parameters ---------- @@ -4542,6 +4526,7 @@ def decode(self, encoding: str, strict: bool = False) -> Series: "bar" null ] + """ if encoding == "hex": return wrap_s(self._s.str_hex_decode(strict)) @@ -4552,7 +4537,7 @@ def decode(self, encoding: str, strict: bool = False) -> Series: def encode(self, encoding: str) -> Series: """ - Encodes a value using the provided encoding + Encode a value using the provided encoding Parameters ---------- @@ -4574,6 +4559,7 @@ def encode(self, encoding: str) -> Series: "626172" null ] + """ if encoding == "hex": return wrap_s(self._s.str_hex_encode()) @@ -4820,7 +4806,7 @@ def split_exact(self, by: str, n: int, inclusive: bool = False) -> Series: ) def replace(self, pattern: str, value: str, literal: bool = False) -> Series: - """ + r""" Replace first matching regex/literal substring with a new string value. Parameters @@ -4877,27 +4863,22 @@ def replace_all(self, pattern: str, value: str, literal: bool = False) -> Series "-bc-bc" "123-123" ] + """ return wrap_s(self._s.str_replace_all(pattern, value, literal)) def strip(self) -> Series: - """ - Remove leading and trailing whitespace. - """ + """Remove leading and trailing whitespace.""" s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).str.strip()).to_series() def lstrip(self) -> Series: - """ - Remove leading whitespace. - """ + """Remove leading whitespace.""" s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).str.lstrip()).to_series() def rstrip(self) -> Series: - """ - Remove trailing whitespace. - """ + """Remove trailing whitespace.""" s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).str.rstrip()).to_series() @@ -4912,6 +4893,7 @@ def zfill(self, alignment: int) -> Series: ---------- alignment Fill the value up to this length. + """ s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).str.zfill(alignment)).to_series() @@ -4983,15 +4965,11 @@ def rjust(self, width: int, fillchar: str = " ") -> Series: ) def to_lowercase(self) -> Series: - """ - Modify the strings to their lowercase equivalent. - """ + """Modify the strings to their lowercase equivalent.""" return wrap_s(self._s.str_to_lowercase()) def to_uppercase(self) -> Series: - """ - Modify the strings to their uppercase equivalent. - """ + """Modify the strings to their uppercase equivalent.""" return wrap_s(self._s.str_to_uppercase()) def slice(self, start: int, length: int | None = None) -> Series: @@ -5041,9 +5019,7 @@ def slice(self, start: int, length: int | None = None) -> Series: class ListNameSpace: - """ - Series.arr namespace. - """ + """Series.arr namespace.""" def __init__(self, series: Series): self._s = series._s @@ -5067,45 +5043,31 @@ def lengths(self) -> Series: return wrap_s(self._s.arr_lengths()) def sum(self) -> Series: - """ - Sum all the arrays in the list - """ + """Sum all the arrays in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.sum()).to_series() def max(self) -> Series: - """ - Compute the max value of the arrays in the list - """ + """Compute the max value of the arrays in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.max()).to_series() def min(self) -> Series: - """ - Compute the min value of the arrays in the list - """ + """Compute the min value of the arrays in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.min()).to_series() def mean(self) -> Series: - """ - Compute the mean value of the arrays in the list - """ + """Compute the mean value of the arrays in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.mean()).to_series() def sort(self, reverse: bool = False) -> Series: - """ - Sort the arrays in the list - """ + """Sort the arrays in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.sort(reverse)).to_series() def reverse(self) -> Series: - """ - Reverse the arrays in the list - """ + """Reverse the arrays in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.reverse()).to_series() def unique(self) -> Series: - """ - Get the unique/distinct values in the list - """ + """Get the unique/distinct values in the list.""" return pli.select(pli.lit(wrap_s(self._s)).arr.unique()).to_series() def concat(self, other: list[Series] | Series | list[Any]) -> Series: @@ -5116,6 +5078,7 @@ def concat(self, other: list[Series] | Series | list[Any]) -> Series: ---------- other Columns to concat into a List Series + """ s = wrap_s(self._s) return s.to_frame().select(pli.col(s.name).arr.concat(other)).to_series() @@ -5131,6 +5094,7 @@ def get(self, index: int) -> Series: ---------- index Index to return per sublist + """ return pli.select(pli.lit(wrap_s(self._s)).arr.get(index)).to_series() @@ -5163,15 +5127,11 @@ def join(self, separator: str) -> Series: return pli.select(pli.lit(wrap_s(self._s)).arr.join(separator)).to_series() def first(self) -> Series: - """ - Get the first value of the sublists. - """ + """Get the first value of the sublists.""" return self.get(0) def last(self) -> Series: - """ - Get the last value of the sublists. - """ + """Get the last value of the sublists.""" return self.get(-1) def contains(self, item: float | str | bool | int | date | datetime) -> Series: @@ -5186,6 +5146,7 @@ def contains(self, item: float | str | bool | int | date | datetime) -> Series: Returns ------- Boolean mask + """ s = pli.Series("", [item]) s_list = wrap_s(self._s) @@ -5199,6 +5160,7 @@ def arg_min(self) -> Series: Returns ------- Series of dtype UInt32/UInt64 (depending on compilation) + """ return pli.select(pli.lit(wrap_s(self._s)).arr.arg_min()).to_series() @@ -5209,6 +5171,7 @@ def arg_max(self) -> Series: Returns ------- Series of dtype UInt32/UInt64 (depending on compilation) + """ return pli.select(pli.lit(wrap_s(self._s)).arr.arg_max()).to_series() @@ -5233,6 +5196,7 @@ def diff(self, n: int = 1, null_behavior: str = "ignore") -> Series: [null, 1, ... 1] [null, -8, -1] ] + """ return pli.select( pli.lit(wrap_s(self._s)).arr.diff(n, null_behavior) @@ -5375,9 +5339,7 @@ def eval(self, expr: pli.Expr, parallel: bool = False) -> Series: class DateTimeNameSpace: - """ - Series.dt namespace. - """ + """Series.dt namespace.""" def __init__(self, series: Series): self._s = series._s @@ -5508,6 +5470,7 @@ def strftime(self, fmt: str) -> Series: Returns ------- Utf8 Series + """ return wrap_s(self._s.strftime(fmt)) @@ -5521,6 +5484,7 @@ def year(self) -> Series: Returns ------- Year as Int32 + """ return wrap_s(self._s.year()) @@ -5534,6 +5498,7 @@ def quarter(self) -> Series: Returns ------- Quarter as UInt32 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.quarter()).to_series() @@ -5548,6 +5513,7 @@ def month(self) -> Series: Returns ------- Month as UInt32 + """ return wrap_s(self._s.month()) @@ -5562,6 +5528,7 @@ def week(self) -> Series: Returns ------- Week number as UInt32 + """ return wrap_s(self._s.week()) @@ -5575,6 +5542,7 @@ def weekday(self) -> Series: Returns ------- Week day as UInt32 + """ return wrap_s(self._s.weekday()) @@ -5589,6 +5557,7 @@ def day(self) -> Series: Returns ------- Day as UInt32 + """ return wrap_s(self._s.day()) @@ -5603,6 +5572,7 @@ def ordinal_day(self) -> Series: Returns ------- Day as UInt32 + """ return wrap_s(self._s.ordinal_day()) @@ -5616,6 +5586,7 @@ def hour(self) -> Series: Returns ------- Hour as UInt32 + """ return wrap_s(self._s.hour()) @@ -5629,6 +5600,7 @@ def minute(self) -> Series: Returns ------- Minute as UInt32 + """ return wrap_s(self._s.minute()) @@ -5642,6 +5614,7 @@ def second(self) -> Series: Returns ------- Second as UInt32 + """ return wrap_s(self._s.second()) @@ -5656,6 +5629,7 @@ def nanosecond(self) -> Series: Returns ------- Nanosecond as UInt32 + """ return wrap_s(self._s.nanosecond()) @@ -5667,6 +5641,7 @@ def timestamp(self, tu: str = "us") -> Series: ---------- tu One of {'ns', 'us', 'ms'} + """ return wrap_s(self._s.timestamp(tu)) @@ -5683,30 +5658,22 @@ def to_python_datetime(self) -> Series: ) def min(self) -> date | datetime | timedelta: - """ - Return minimum as python DateTime - """ + """Return minimum as python DateTime.""" # we can ignore types because we are certain we get a logical type return wrap_s(self._s).min() # type: ignore[return-value] def max(self) -> date | datetime | timedelta: - """ - Return maximum as python DateTime - """ + """Return maximum as python DateTime.""" return wrap_s(self._s).max() # type: ignore[return-value] def median(self) -> date | datetime | timedelta: - """ - Return median as python DateTime - """ + """Return median as python DateTime.""" s = wrap_s(self._s) out = int(s.median()) return _to_python_datetime(out, s.dtype, s.time_unit) def mean(self) -> date | datetime: - """ - Return mean as python DateTime - """ + """Return mean as python DateTime.""" s = wrap_s(self._s) out = int(s.mean()) return _to_python_datetime(out, s.dtype, s.time_unit) @@ -5719,6 +5686,7 @@ def epoch(self, tu: str = "us") -> Series: ---------- tu One of {'ns', 'us', 'ms', 's', 'd'} + """ if tu in DTYPE_TEMPORAL_UNITS: return self.timestamp(tu) @@ -5740,6 +5708,7 @@ def epoch_days(self) -> Series: Returns ------- Days as Int32 + """ return wrap_s(self._s).cast(Date).cast(Int32) @@ -5756,6 +5725,7 @@ def epoch_milliseconds(self) -> Series: Returns ------- Milliseconds as Int64 + """ return self.timestamp("ms") @@ -5770,6 +5740,7 @@ def epoch_seconds(self) -> Series: Returns ------- Milliseconds as Int64 + """ return wrap_s(self._s.dt_epoch_seconds()) @@ -5782,6 +5753,7 @@ def with_time_unit(self, tu: str) -> Series: ---------- tu Time unit for the `Datetime` Series: any of {"ns", "us", "ms"} + """ return pli.select(pli.lit(wrap_s(self._s)).dt.with_time_unit(tu)).to_series() @@ -5793,6 +5765,7 @@ def cast_time_unit(self, tu: str) -> Series: ---------- tu Time unit for the `Datetime` Series: any of {"ns", "us", "ms"} + """ return pli.select(pli.lit(wrap_s(self._s)).dt.cast_time_unit(tu)).to_series() @@ -5807,6 +5780,7 @@ def and_time_unit(self, tu: str) -> Series: ---------- tu Time unit for the `Datetime` Series: any of {"ns", "us", "ms"} + """ return self.with_time_unit(tu) @@ -5844,6 +5818,7 @@ def days(self) -> Series: Returns ------- A series of dtype Int64 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.days()).to_series() @@ -5854,6 +5829,7 @@ def hours(self) -> Series: Returns ------- A series of dtype Int64 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.hours()).to_series() @@ -5864,6 +5840,7 @@ def minutes(self) -> Series: Returns ------- A series of dtype Int64 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.minutes()).to_series() @@ -5874,6 +5851,7 @@ def seconds(self) -> Series: Returns ------- A series of dtype Int64 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.seconds()).to_series() @@ -5884,6 +5862,7 @@ def milliseconds(self) -> Series: Returns ------- A series of dtype Int64 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.milliseconds()).to_series() @@ -5894,6 +5873,7 @@ def nanoseconds(self) -> Series: Returns ------- A series of dtype Int64 + """ return pli.select(pli.lit(wrap_s(self._s)).dt.nanoseconds()).to_series() @@ -5924,14 +5904,13 @@ def offset_by(self, by: str) -> Series: Returns ------- Date/Datetime expression + """ return pli.select(pli.lit(wrap_s(self._s)).dt.offset_by(by)).to_series() class CatNameSpace: - """ - Namespace for categorical related series. - """ + """Namespace for categorical related series.""" def __init__(self, s: Series): self._s = s @@ -5974,14 +5953,13 @@ def set_ordering(self, ordering: str) -> Series: ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ z ┆ 3 │ └──────┴──────┘ + """ return pli.select(pli.lit(self._s).cat.set_ordering(ordering)).to_series() class SeriesIter: - """ - Utility class that allows slow iteration over a `Series`. - """ + """Utility class that allows slow iteration over a `Series`.""" def __init__(self, length: int, s: Series): self.len = length diff --git a/py-polars/polars/internals/slice.py b/py-polars/polars/internals/slice.py index 117e624ffea7..8b27ffea6656 100644 --- a/py-polars/polars/internals/slice.py +++ b/py-polars/polars/internals/slice.py @@ -7,8 +7,10 @@ class PolarsSlice: """ - Apply python slice object to Polars DataFrame or Series, - with full support for negative indexing and/or stride. + Apply Python slice object to Polars DataFrame or Series. + + Has full support for negative indexing and/or stride. + """ stop: int @@ -23,38 +25,28 @@ def __init__(self, obj: FrameOrSeries): @staticmethod def _as_original(lazy: "pli.LazyFrame", original: FrameOrSeries) -> FrameOrSeries: - """ - Return lazy variant back to its original type. - """ + """Return lazy variant back to its original type.""" frame = lazy.collect() return frame if isinstance(original, pli.DataFrame) else frame.to_series() @staticmethod def _lazify(obj: FrameOrSeries) -> "pli.LazyFrame": - """ - Make lazy to ensure efficent/consistent handling. - """ + """Make lazy to ensure efficent/consistent handling.""" return obj.lazy() if isinstance(obj, pli.DataFrame) else obj.to_frame().lazy() def _slice_positive(self, obj: "pli.LazyFrame") -> "pli.LazyFrame": - """ - Logic for slices with positive stride. - """ + """Logic for slices with positive stride.""" # note: at this point stride is guaranteed to be > 1 return obj.slice(self.start, self.slice_length).take_every(self.stride) def _slice_negative(self, obj: "pli.LazyFrame") -> "pli.LazyFrame": - """ - Logic for slices with negative stride. - """ + """Logic for slices with negative stride.""" stride = abs(self.stride) lazyslice = obj.slice(self.stop + 1, self.slice_length).reverse() return lazyslice.take_every(stride) if (stride > 1) else lazyslice def _slice_setup(self, s: slice) -> None: - """ - Normalise slice bounds, identify unbounded and/or zero-length slices. - """ + """Normalise slice bounds, identify unbounded and/or zero-length slices.""" # can normalise slice indices as we know object size obj_len = len(self.obj) start, stop, stride = slice(s.start, s.stop, s.step).indices(obj_len) @@ -83,9 +75,7 @@ def _slice_setup(self, s: slice) -> None: self.start, self.stop, self.stride = start, stop, stride def apply(self, s: slice) -> FrameOrSeries: - """ - Apply a slice operation, taking advantage of any potential fast paths. - """ + """Apply a slice operation, taking advantage of any potential fast paths.""" # normalise slice self._slice_setup(s) @@ -114,8 +104,11 @@ def apply(self, s: slice) -> FrameOrSeries: class LazyPolarsSlice: """ - Apply python slice object to Polars LazyFrame. Only slices with efficient - computation paths mapping directly to existing lazy methods are supported. + Apply python slice object to Polars LazyFrame. + + Only slices with efficient computation paths mapping directly to existing lazy + methods are supported. + """ obj: "pli.LazyFrame" @@ -125,9 +118,12 @@ def __init__(self, obj: "pli.LazyFrame"): def apply(self, s: slice) -> "pli.LazyFrame": """ - Apply a slice operation. Note that LazyFrame is designed primarily for efficient - computation and does not know its own length so, unlike DataFrame, certain slice - patterns (such as those requiring negative stop/step) may not be supported. + Apply a slice operation. + + Note that LazyFrame is designed primarily for efficient computation and does not + know its own length so, unlike DataFrame, certain slice patterns (such as those + requiring negative stop/step) may not be supported. + """ start = s.start or 0 step = s.step or 1 diff --git a/py-polars/polars/internals/whenthen.py b/py-polars/polars/internals/whenthen.py index 09d49b9cb329..9eb88d6c67f1 100644 --- a/py-polars/polars/internals/whenthen.py +++ b/py-polars/polars/internals/whenthen.py @@ -13,17 +13,13 @@ class WhenThenThen: - """ - Utility class. See the `when` function. - """ + """Utility class. See the `when` function.""" def __init__(self, pywhenthenthen: Any): self.pywhenthenthen = pywhenthenthen def when(self, predicate: pli.Expr | bool) -> WhenThenThen: - """ - Start another "when, then, otherwise" layer. - """ + """Start another "when, then, otherwise" layer.""" predicate = pli.expr_to_lit_or_expr(predicate) return WhenThenThen(self.pywhenthenthen.when(predicate._pyexpr)) @@ -46,6 +42,7 @@ def then( -------- when : Start another when, then, otherwise layer. otherwise : Values to return in case of the predicate being `False`. + """ expr_ = pli.expr_to_lit_or_expr(expr) return WhenThenThen(self.pywhenthenthen.then(expr_._pyexpr)) @@ -63,23 +60,20 @@ def otherwise( -------- when : Start another when, then, otherwise layer. then : Values to return in case of the predicate being `True`. + """ expr = pli.expr_to_lit_or_expr(expr) return pli.wrap_expr(self.pywhenthenthen.otherwise(expr._pyexpr)) class WhenThen: - """ - Utility class. See the `when` function. - """ + """Utility class. See the `when` function.""" def __init__(self, pywhenthen: Any): self._pywhenthen = pywhenthen def when(self, predicate: pli.Expr | bool) -> WhenThenThen: - """ - Start another "when, then, otherwise" layer. - """ + """Start another "when, then, otherwise" layer.""" predicate = pli.expr_to_lit_or_expr(predicate) return WhenThenThen(self._pywhenthen.when(predicate._pyexpr)) @@ -91,15 +85,14 @@ def otherwise(self, expr: pli.Expr | int | float | str | None) -> pli.Expr: -------- when : Start another when, then, otherwise layer. then : Values to return in case of the predicate being `True`. + """ expr = pli.expr_to_lit_or_expr(expr) return pli.wrap_expr(self._pywhenthen.otherwise(expr._pyexpr)) class When: - """ - Utility class. See the `when` function. - """ + """Utility class. See the `when` function.""" def __init__(self, pywhen: pywhen): self._pywhen = pywhen @@ -123,6 +116,7 @@ def then( -------- when : Start another when, then, otherwise layer. otherwise : Values to return in case of the predicate being `False`. + """ expr = pli.expr_to_lit_or_expr(expr) pywhenthen = self._pywhen.then(expr._pyexpr) @@ -179,6 +173,7 @@ def when(expr: pli.Expr | bool) -> When: -------- then : Values to return in case of the predicate being `True`. otherwise : Values to return in case of the predicate being `False`. + """ expr = pli.expr_to_lit_or_expr(expr) pw = pywhen(expr._pyexpr) diff --git a/py-polars/polars/string_cache.py b/py-polars/polars/string_cache.py index f05717f1115b..80849e47781b 100644 --- a/py-polars/polars/string_cache.py +++ b/py-polars/polars/string_cache.py @@ -13,9 +13,12 @@ class StringCache: """ Context manager that allows data sources to share the same categorical features. + This will temporarily cache the string categories until the context manager is finished. + Examples + -------- >>> with pl.StringCache(): ... df1 = pl.DataFrame( ... [ @@ -53,6 +56,7 @@ class StringCache: ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ red ┆ 1 ┆ e │ └────────┴───────┴──────┘ + """ def __init__(self) -> None: @@ -73,7 +77,10 @@ def __exit__( def toggle_string_cache(toggle: bool) -> None: """ - Turn on/off the global string cache. This ensures that casts to Categorical types - have the categories when string values are equal. + Turn on/off the global string cache. + + This ensures that casts to Categorical types have the categories when string values + are equal. + """ pytoggle_string_cache(toggle) diff --git a/py-polars/polars/testing.py b/py-polars/polars/testing.py index 5e786d73fa84..8fba019ea855 100644 --- a/py-polars/polars/testing.py +++ b/py-polars/polars/testing.py @@ -110,8 +110,8 @@ def assert_frame_equal( >>> df1 = pl.DataFrame({"a": [1, 2, 3]}) >>> df2 = pl.DataFrame({"a": [2, 3, 4]}) >>> pl.testing.assert_frame_equal(df1, df2) # doctest: +SKIP - """ + """ if isinstance(left, pli.LazyFrame) and isinstance(right, pli.LazyFrame): left, right = left.collect(), right.collect() obj = "pli.LazyFrame" @@ -187,6 +187,7 @@ def assert_series_equal( >>> s1 = pl.Series([1, 2, 3]) >>> s2 = pl.Series([2, 3, 4]) >>> pl.testing.assert_series_equal(s1, s2) # doctest: +SKIP + """ obj = "Series" @@ -218,9 +219,7 @@ def _assert_series_inner( rtol: float, obj: str, ) -> None: - """ - Compares Series dtype + values - """ + """Compare Series dtype + values.""" try: can_be_subtracted = hasattr(dtype_to_py_type(left.dtype), "__sub__") except NotImplementedError: @@ -275,8 +274,11 @@ def raise_assert_detail( def _getattr_multi(obj: object, op: str) -> Any: """ - Allows `op` to be multiple layers deep, i.e. op="str.lengths" will mean we first - get the attribute "str", and then the attribute "lengths" + Allow `op` to be multiple layers deep. + + For example, op="str.lengths" will mean we first get the attribute "str", and then + the attribute "lengths". + """ op_list = op.split(".") return reduce(lambda o, m: getattr(o, m), op_list, obj) @@ -286,14 +288,14 @@ def verify_series_and_expr_api( input: pli.Series, expected: pli.Series | None, op: str, *args: Any, **kwargs: Any ) -> None: """ - Small helper function to test element-wise functions for both the series and - expressions api. + Test element-wise functions for both the series and expressions API. Examples -------- >>> s = pl.Series([1, 3, 2]) >>> expected = pl.Series([1, 2, 3]) >>> verify_series_and_expr_api(s, expected, "sort") + """ expr = _getattr_multi(pli.col("*"), op)(*args, **kwargs) result_expr: pli.Series = input.to_frame().select(expr)[ # type: ignore[assignment] @@ -308,9 +310,7 @@ def verify_series_and_expr_api( def is_categorical_dtype(data_type: Any) -> bool: - """ - Check if the input is a polars Categorical dtype. - """ + """Check if the input is a polars Categorical dtype.""" return ( type(data_type) is type and issubclass(data_type, Categorical) @@ -363,16 +363,14 @@ def is_categorical_dtype(data_type: Any) -> bool: strategy_dtypes = list(dtype_strategy_mapping) def between(draw: Callable, type_: type, min_: Any, max_: Any) -> Any: - """ - Draw a value in a given range from a type-inferred strategy. - """ + """Draw a value in a given range from a type-inferred strategy.""" strategy_init = from_type(type_).function # type: ignore[attr-defined] return draw(strategy_init(min_, max_)) @dataclass class column: """ - Define a column for use with `dataframes` strategy. + Define a column for use with the @dataframes strategy. Parameters ---------- @@ -396,6 +394,7 @@ class column: column(name='unique_small_ints', dtype=, strategy=None, null_probability=None, unique=True) >>> pl.testing.column(name="ccy", strategy=sampled_from(["GBP", "EUR", "JPY"])) column(name='ccy', dtype=, strategy=sampled_from(['GBP', 'EUR', 'JPY']), null_probability=None, unique=False) + """ # noqa: E501 name: str @@ -450,6 +449,8 @@ def columns( unique: bool = False, ) -> list[column]: """ + Define multiple columns for use with the @dataframes strategy. + Generate a fixed sequence of `column` objects suitable for passing to the @dataframes strategy, or using standalone (note that this function is not itself a strategy). @@ -495,6 +496,7 @@ def columns( >>> @given(dataframes(columns(["x", "y", "z"], unique=True))) ... def test_unique_xyz(df: pl.DataFrame) -> None: ... assert_something(df) + """ # create/assign named columns if cols is None: @@ -602,7 +604,7 @@ def series( 6414 -63290 ] - >>> + """ selectable_dtypes = [ dtype diff --git a/py-polars/tests/io/test_parquet.py b/py-polars/tests/io/test_parquet.py index 53b2e72a713c..74de72bbf96f 100644 --- a/py-polars/tests/io/test_parquet.py +++ b/py-polars/tests/io/test_parquet.py @@ -90,9 +90,7 @@ def test_select_projection() -> None: def test_parquet_chunks() -> None: - """ - This failed in https://github.com/pola-rs/polars/issues/545 - """ + # This failed in https://github.com/pola-rs/polars/issues/545 cases = [ 1048576, 1048577, @@ -116,9 +114,7 @@ def test_parquet_chunks() -> None: def test_parquet_datetime() -> None: - """ - This failed because parquet writers cast datetime to Date - """ + # This failed because parquet writers cast datetime to Date f = io.BytesIO() data = { "datetime": [ # unix timestamp in ms diff --git a/py-polars/tests/run_doc_examples.py b/py-polars/tests/run_doc_examples.py index 63b1e8469f12..463d13f8fd3e 100644 --- a/py-polars/tests/run_doc_examples.py +++ b/py-polars/tests/run_doc_examples.py @@ -14,7 +14,8 @@ 3. if you would not like code to run: add `#doctest: +SKIP`. You may still add example output. -Notes: +Notes +----- * Doctest does not have a built-in IGNORE_RESULT directive. We have a number of tests where we want to ensure that the code runs, but the output may be random by design, or not interesting for us to check. To allow for this behaviour, a custom output checker diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py index a86541ce477c..99721d50a938 100644 --- a/py-polars/tests/test_df.py +++ b/py-polars/tests/test_df.py @@ -1081,7 +1081,8 @@ def test_hash_rows() -> None: def test_reproducible_hash_with_seeds() -> None: - """Tests the reproducibility of DataFrame.hash_rows, Series.hash, and Expr.hash. + """ + Test the reproducibility of DataFrame.hash_rows, Series.hash, and Expr.hash. cf. issue #3966, hashes must always be reproducible across sessions when using the same seeds. @@ -1609,7 +1610,7 @@ def test_getattr() -> None: def test_get_item() -> None: - """test all the methods to use [] on a dataframe""" + """Test all the methods to use [] on a dataframe.""" df = pl.DataFrame({"a": [1.0, 2.0, 3.0, 4.0], "b": [3, 4, 5, 6]}) # expression @@ -1788,8 +1789,7 @@ def test_join_suffixes() -> None: def test_preservation_of_subclasses() -> None: - """Tests for DataFrame inheritance.""" - + """Test for DataFrame inheritance.""" # We should be able to inherit from polars.DataFrame class SubClassedDataFrame(pl.DataFrame): pass diff --git a/py-polars/tests/test_lazy.py b/py-polars/tests/test_lazy.py index 6c59176e5be1..b08e43d5a593 100644 --- a/py-polars/tests/test_lazy.py +++ b/py-polars/tests/test_lazy.py @@ -1248,8 +1248,7 @@ def test_self_join() -> None: def test_preservation_of_subclasses() -> None: - """Tests for LazyFrame inheritance.""" - + """Test for LazyFrame inheritance.""" # We should be able to inherit from polars.LazyFrame class SubClassedLazyFrame(pl.LazyFrame): pass @@ -1383,9 +1382,8 @@ def test_explode_inner_lists_3985() -> None: def test_lazy_method() -> None: - """ - We want to support `.lazy()` on a Lazy DataFrame as to allow more generic user code. - """ + # We want to support `.lazy()` on a Lazy DataFrame as to allow more generic user + # code. df = pl.DataFrame({"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 3, 4, 5, 6]}) lazy_df = df.lazy() diff --git a/py-polars/tests/test_series.py b/py-polars/tests/test_series.py index 7ccb7b3176de..bb3c18395a36 100644 --- a/py-polars/tests/test_series.py +++ b/py-polars/tests/test_series.py @@ -517,7 +517,7 @@ def test_set_invalid_key(key: Any) -> None: ], ) def test_set_key_series(key: pl.Series) -> None: - """only UInt32/UInt64/bool are allowed""" + """Only UInt32/UInt64/bool are allowed.""" s = pl.Series("a", [1, 2, 3]) s[key] = 4 assert_series_equal(s, pl.Series("a", [1, 4, 4])) diff --git a/py-polars/tests/test_struct.py b/py-polars/tests/test_struct.py index c62b2bc94d8c..eb802124b31b 100644 --- a/py-polars/tests/test_struct.py +++ b/py-polars/tests/test_struct.py @@ -180,9 +180,11 @@ def test_struct_cols() -> None: """Test that struct columns can be imported and work as expected.""" def build_struct_df(data: list) -> DataFrame: - """Build Polars df from list of dicts. + """ + Build Polars df from list of dicts. Can't import directly because of issue #3145. + """ arrow_df = pa.Table.from_pylist(data) polars_df = pl.from_arrow(arrow_df)