From 597a9fbd79090071546c0c542ba87ad42ed044e3 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 29 May 2023 10:15:42 +0200 Subject: [PATCH 1/2] chore: type-check friendlier exports --- python/deltalake/__init__.py | 15 +++++--- python/deltalake/_internal.pyi | 62 +++++++++++++++++++++++++++++----- python/src/lib.rs | 4 +-- 3 files changed, 65 insertions(+), 16 deletions(-) diff --git a/python/deltalake/__init__.py b/python/deltalake/__init__.py index 38f803c1ee..fb99ed7563 100644 --- a/python/deltalake/__init__.py +++ b/python/deltalake/__init__.py @@ -1,5 +1,10 @@ -from ._internal import PyDeltaTableError, RawDeltaTable, __version__, rust_core_version -from .data_catalog import DataCatalog -from .schema import DataType, Field, Schema -from .table import DeltaTable, Metadata -from .writer import write_deltalake +from ._internal import PyDeltaTableError as PyDeltaTableError +from ._internal import __version__ as __version__ +from ._internal import rust_core_version as rust_core_version +from .data_catalog import DataCatalog as DataCatalog +from .schema import DataType as DataType +from .schema import Field as Field +from .schema import Schema as Schema +from .table import DeltaTable as DeltaTable +from .table import Metadata as Metadata +from .writer import write_deltalake as write_deltalake diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi index 5e7d6db94c..67f0d4def8 100644 --- a/python/deltalake/_internal.pyi +++ b/python/deltalake/_internal.pyi @@ -95,18 +95,34 @@ class Field: *, nullable: bool = True, metadata: Optional[Dict[str, Any]] = None, - ) -> None: ... + ) -> None: + """A named field, with a data type, nullability, and optional metadata.""" name: str + """The field name.""" type: DataType + """The field data type.""" nullable: bool + """The field nullability.""" metadata: Dict[str, Any] + """The field metadata.""" - def to_json(self) -> str: ... + def to_json(self) -> str: + """Get the JSON representation of the Field. + + :rtype: str + """ @staticmethod - def from_json(json: str) -> "Field": ... - def to_pyarrow(self) -> pa.Field: ... + def from_json(json: str) -> "Field": + """Create a new Field from a JSON string. + + :param json: A json string representing the Field. + :rtype: Field + """ + def to_pyarrow(self) -> pa.Field: + """Convert field to a pyarrow.Field.""" @staticmethod - def from_pyarrow(type: pa.Field) -> "Field": ... + def from_pyarrow(type: pa.Field) -> "Field": + """Create a new field from pyarrow.Field.""" class StructType: def __init__(self, fields: List[Field]) -> None: ... @@ -124,13 +140,41 @@ class Schema: def __init__(self, fields: List[Field]) -> None: ... fields: List[Field] invariants: List[Tuple[str, str]] + """The list of invariants defined on the table. + + The first string in each tuple is the field path, the second is the SQL of the invariant. + """ - def to_json(self) -> str: ... + def to_json(self) -> str: + """Get the JSON representation of the schema. + + :rtype: str + """ @staticmethod - def from_json(json: str) -> "Schema": ... - def to_pyarrow(self, as_large_types: bool = False) -> pa.Schema: ... + def from_json(json: str) -> "Schema": + """Create a new Schema from a JSON string. + + :param schema_json: a JSON string + :rtype: Schema + """ + def to_pyarrow(self, as_large_types: bool = False) -> pa.Schema: + """Return equivalent PyArrow schema. + + Note: this conversion is lossy as the Invariants are not stored in pyarrow.Schema. + + :param as_large_types: get schema with all variable size types (list, + binary, string) as large variants (with int64 indices). This is for + compatibility with systems like Polars that only support the large + versions of Arrow types. + :rtype: pyarrow.Schema + """ @staticmethod - def from_pyarrow(type: pa.Schema) -> "Schema": ... + def from_pyarrow(type: pa.Schema) -> "Schema": + """Create a new Schema from a pyarrow.Schema. + + :param data_type: a PyArrow schema + :rtype: Schema + """ class ObjectInputFile: @property diff --git a/python/src/lib.rs b/python/src/lib.rs index cdcaefa2cc..8a215039f3 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -312,7 +312,7 @@ impl RawDeltaTable { Ok(metrics.files_deleted) } - // Run the optimize command on the Delta Table: merge small files into a large file by bin-packing. + /// Run the optimize command on the Delta Table: merge small files into a large file by bin-packing. #[pyo3(signature = (partition_filters = None, target_size = None))] pub fn optimize( &mut self, @@ -334,7 +334,7 @@ impl RawDeltaTable { Ok(serde_json::to_string(&metrics).unwrap()) } - // Run the History command on the Delta Table: Returns provenance information, including the operation, user, and so on, for each write to a table. + /// Run the History command on the Delta Table: Returns provenance information, including the operation, user, and so on, for each write to a table. pub fn history(&mut self, limit: Option) -> PyResult> { let history = rt()? .block_on(self._table.history(limit)) From 00acb91b0fe0a75416ef6b8bc234b491cab49953 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 29 May 2023 14:11:01 +0200 Subject: [PATCH 2/2] ci: remove unused ruff omits --- python/pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index cf7ea7c787..013d54a409 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -80,12 +80,8 @@ select = [ # isort "I" ] -unfixable = ["F401"] ignore = ["E501"] -[tool.ruff.per-file-ignores] -"deltalake/__init__.py" = ["F401"] - [tool.ruff.isort] known-first-party = ["deltalake"]