From 37df95418404b867a40e7483fcc2858797be50c8 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 25 Jun 2024 04:18:31 +0200 Subject: [PATCH] feat: api letsql api methods (#105) - replace the ibis api methods in the library - add register_dataframe to support arbitrary expressions over datafusion expressions --- python/letsql/__init__.py | 10 +- python/letsql/backends/datafusion/__init__.py | 72 +- python/letsql/backends/let/__init__.py | 53 +- python/letsql/backends/let/tests/conftest.py | 11 +- python/letsql/backends/let/tests/test_api.py | 60 + .../letsql/backends/let/tests/test_cache.py | 9 +- .../letsql/backends/let/tests/test_execute.py | 14 +- python/letsql/common/caching.py | 2 +- .../common/utils/dask_normalize_function.py | 1 + python/letsql/config.py | 112 +- python/letsql/expr/api.py | 1517 +++++++++++++++++ python/letsql/internal.py | 4 +- python/letsql/tests/test_aggregation.py | 9 +- python/letsql/tests/test_api.py | 19 +- python/letsql/tests/test_array.py | 40 +- python/letsql/tests/test_client.py | 56 +- python/letsql/tests/test_generic.py | 68 +- python/letsql/tests/test_join.py | 24 +- python/letsql/tests/test_numeric.py | 47 +- python/letsql/tests/test_param.py | 25 + python/letsql/tests/test_set_ops.py | 10 +- python/letsql/tests/test_string.py | 26 +- python/letsql/tests/test_struct.py | 14 +- python/letsql/tests/test_temporal.py | 34 +- python/letsql/tests/test_wrapper.py | 11 +- src/context.rs | 10 + src/dataframe.rs | 2 +- 27 files changed, 2009 insertions(+), 251 deletions(-) create mode 100644 python/letsql/backends/let/tests/test_api.py create mode 100644 python/letsql/expr/api.py create mode 100644 python/letsql/tests/test_param.py diff --git a/python/letsql/__init__.py b/python/letsql/__init__.py index 47114652..28cfc103 100644 --- a/python/letsql/__init__.py +++ b/python/letsql/__init__.py @@ -4,6 +4,8 @@ from letsql import examples from letsql.config import options +from letsql.expr import api +from letsql.expr.api import * # noqa: F403 from letsql.backends.let import Backend @@ -12,7 +14,13 @@ except ModuleNotFoundError: import importlib_metadata -__all__ = ["examples", "connect", "options"] +__all__ = [ # noqa: PLE0604 + "api", + "examples", + "connect", + "options", + *api.__all__, +] def connect() -> Backend: diff --git a/python/letsql/backends/datafusion/__init__.py b/python/letsql/backends/datafusion/__init__.py index ffa40404..3116e2bc 100644 --- a/python/letsql/backends/datafusion/__init__.py +++ b/python/letsql/backends/datafusion/__init__.py @@ -30,7 +30,13 @@ import letsql.internal as df from letsql.backends.datafusion.compiler import DataFusionCompiler from letsql.backends.datafusion.provider import IbisTableProvider -from letsql.internal import SessionConfig, SessionContext, TableProvider, Table +from letsql.internal import ( + SessionConfig, + SessionContext, + TableProvider, + Table, + DataFrame, +) if TYPE_CHECKING: import pandas as pd @@ -46,35 +52,11 @@ class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, def version(self): return letsql.__version__ - def do_connect( - self, config: Mapping[str, str | Path] | SessionContext | None = None - ) -> None: - """Create a Datafusion backend for use with Ibis. - - Parameters - ---------- - config - Mapping of table names to files. - - Examples - -------- - >>> import letsql as ls - >>> config = {"t": "path/to/file.parquet", "s": "path/to/file.csv"} - >>> ls.connect(config) - - """ - if isinstance(config, SessionContext): - (self.con, config) = (config, None) - else: - if config is not None and not isinstance(config, Mapping): - raise TypeError("Input to datafusion.connect must be a mapping") - if SessionConfig is not None: - df_config = SessionConfig( - {"datafusion.sql_parser.dialect": "PostgreSQL"} - ).with_information_schema(True) - else: - df_config = None - self.con = SessionContext(config=df_config) + def do_connect(self, config: Mapping[str, str | Path] | None = None) -> None: + df_config = SessionConfig( + {"datafusion.sql_parser.dialect": "PostgreSQL"} + ).with_information_schema(True) + self.con = SessionContext(config=df_config) self._register_builtin_udfs() @@ -185,9 +167,6 @@ def raw_sql(self, query: str | sge.Expression) -> Any: ---------- query Raw SQL string - kwargs - Backend specific query arguments - """ with contextlib.suppress(AttributeError): query = query.sql(dialect=self.dialect, pretty=True) @@ -305,29 +284,6 @@ def register( kwargs Datafusion-specific keyword arguments - Examples - -------- - Register a csv: - - >>> import ibis - >>> conn = ibis.datafusion.connect(config) - >>> conn.register("path/to/data.csv", "my_table") - >>> conn.table("my_table") - - Register a PyArrow table: - - >>> import pyarrow as pa - >>> tab = pa.table({"x": [1, 2, 3]}) - >>> conn.register(tab, "my_table") - >>> conn.table("my_table") - - Register a PyArrow dataset: - - >>> import pyarrow.dataset as ds - >>> dataset = ds.dataset("path/to/table") - >>> conn.register(dataset, "my_table") - >>> conn.table("my_table") - """ import pandas as pd @@ -380,6 +336,10 @@ def register( self.con.deregister_table(table_ident) self.con.register_table(table_ident, source) return self.table(table_name) + elif isinstance(source, DataFrame): + self.con.deregister_table(table_ident) + self.con.register_dataframe(table_ident, source) + return self.table(table_name) else: raise ValueError(f"Unknown `source` type {type(source)}") diff --git a/python/letsql/backends/let/__init__.py b/python/letsql/backends/let/__init__.py index d6941a6b..83afd323 100644 --- a/python/letsql/backends/let/__init__.py +++ b/python/letsql/backends/let/__init__.py @@ -8,7 +8,6 @@ import pyarrow_hotfix # noqa: F401 from ibis import BaseBackend from ibis.expr import types as ir -from ibis.expr import operations as ops from ibis.expr.schema import SchemaLike from ibis.backends.datafusion import Backend as IbisDataFusionBackend from sqlglot import exp, parse_one @@ -24,7 +23,6 @@ replace_cache_table, ) from letsql.expr.translate import sql_to_ibis -from letsql.internal import SessionContext def _get_datafusion_table(con, table_name, database="public"): @@ -33,6 +31,16 @@ def _get_datafusion_table(con, table_name, database="public"): return public.table(table_name) +def _get_datafusion_dataframe(con, expr, **kwargs): + con._register_udfs(expr) + con._register_in_memory_tables(expr) + + table_expr = expr.as_table() + raw_sql = con.compile(table_expr, **kwargs) + + return con.con.sql(raw_sql) + + class Backend(DataFusionBackend): name = "let" @@ -51,17 +59,13 @@ def register( table_or_expr = source.op() backend = source._find_backend(use_default=False) - if backend == self: - table_or_expr = self._sources.get_table_or_op(table_or_expr) - original_backend = self._sources.get_backend(table_or_expr) - is_a_datafusion_backed_table = isinstance( - original_backend, (DataFusionBackend, IbisDataFusionBackend) - ) and isinstance(table_or_expr, ops.DatabaseTable) - if is_a_datafusion_backed_table: - source = _get_datafusion_table( - original_backend.con, table_or_expr.name - ) - table_or_expr = None + if isinstance(backend, Backend): + if backend is self: + table_or_expr = self._sources.get_table_or_op(table_or_expr) + backend = self._sources.get_backend(table_or_expr) + + if isinstance(backend, (DataFusionBackend, IbisDataFusionBackend)): + source = _get_datafusion_dataframe(backend, source) registered_table = super().register(source, table_name=table_name, **kwargs) self._sources[registered_table.op()] = table_or_expr or registered_table.op() @@ -99,7 +103,7 @@ def read_delta( return registered_table def execute(self, expr: ir.Expr, **kwargs: Any): - not_multi_engine = self._get_source(expr) != self + not_multi_engine = self._get_source(expr) is not self if ( not_multi_engine ): # this means is a single source that is not the letsql backend @@ -112,14 +116,25 @@ def replace_table(node, _, **_kwargs): expr = self._register_and_transform_cache_tables(expr) backend = self._get_source(expr) - if backend == self: + if backend is self: backend = super() return backend.execute(expr, **kwargs) - def do_connect( - self, config: Mapping[str, str | Path] | SessionContext | None = None - ) -> None: + def do_connect(self, config: Mapping[str, str | Path] | None = None) -> None: + """Creates a connection. + + Parameters + ---------- + config + Mapping of table names to files. + + Examples + -------- + >>> import letsql as ls + >>> con = ls.connect() + + """ super().do_connect(config=config) def _get_source(self, expr: ir.Expr): @@ -161,7 +176,7 @@ def fn(node, _, **kwargs): uncached_to_expr = uncached.to_expr() node = storage.set_default(uncached_to_expr, uncached) table = node.to_expr() - if node.source == self: + if node.source is self: table = _get_datafusion_table(self.con, node.name) self.register(table, table_name=node.name) return node diff --git a/python/letsql/backends/let/tests/conftest.py b/python/letsql/backends/let/tests/conftest.py index 76b723ae..1a86dc9f 100644 --- a/python/letsql/backends/let/tests/conftest.py +++ b/python/letsql/backends/let/tests/conftest.py @@ -49,7 +49,12 @@ def dirty(pg): def remove_unexpected_tables(dirty): for table in dirty.list_tables(): if table not in expected_tables: - dirty.drop_table(table) + dirty.drop_table(table, force=True) + + for table in dirty.list_tables(): + if table not in expected_tables: + dirty.drop_view(table, force=True) + if sorted(dirty.list_tables()) != sorted(expected_tables): raise ValueError @@ -72,7 +77,9 @@ def dirty_ls_con(): def ls_con(dirty_ls_con): yield dirty_ls_con for table_name in dirty_ls_con.list_tables(): - dirty_ls_con.drop_table(table_name) + dirty_ls_con.drop_table(table_name, force=True) + for table_name in dirty_ls_con.list_tables(): + dirty_ls_con.drop_view(table_name, force=True) @pytest.fixture(scope="session") diff --git a/python/letsql/backends/let/tests/test_api.py b/python/letsql/backends/let/tests/test_api.py new file mode 100644 index 00000000..c1baeb36 --- /dev/null +++ b/python/letsql/backends/let/tests/test_api.py @@ -0,0 +1,60 @@ +from pathlib import Path + +import pytest + + +import letsql as ls + + +@pytest.fixture(scope="session") +def csv_dir(): + root = Path(__file__).absolute().parents[5] + data_dir = root / "ci" / "ibis-testing-data" / "csv" + return data_dir + + +@pytest.fixture(scope="session") +def parquet_dir(): + root = Path(__file__).absolute().parents[5] + data_dir = root / "ci" / "ibis-testing-data" / "parquet" + return data_dir + + +def test_register_read_csv(con, csv_dir): + api_batting = con.register( + ls.read_csv(csv_dir / "batting.csv"), table_name="api_batting" + ) + result = api_batting.execute() + + assert result is not None + + +def test_register_read_parquet(con, parquet_dir): + api_batting = con.register( + ls.read_parquet(parquet_dir / "batting.parquet"), table_name="api_batting" + ) + result = api_batting.execute() + + assert result is not None + + +def test_executed_on_original_backend(ls_con, parquet_dir, csv_dir, mocker): + con = ls.config._backend_init() + spy = mocker.spy(con, "execute") + + table_name = "batting" + parquet_table = ls.read_parquet(parquet_dir / "batting.parquet")[ + lambda t: t.yearID == 2015 + ].pipe(ls_con.register, f"parquet-{table_name}") + + csv_table = ls.read_csv(csv_dir / "batting.csv")[lambda t: t.yearID == 2014].pipe( + ls_con.register, f"csv-{table_name}" + ) + + expr = parquet_table.join( + csv_table, + "playerID", + ) + + assert expr.execute() is not None + assert spy.call_count == 1 diff --git a/python/letsql/backends/let/tests/test_cache.py b/python/letsql/backends/let/tests/test_cache.py index 4efc8e51..3f480802 100644 --- a/python/letsql/backends/let/tests/test_cache.py +++ b/python/letsql/backends/let/tests/test_cache.py @@ -541,14 +541,15 @@ def test_read_csv_compute_and_cache(con, csv_dir, tmp_path): assert expr.execute() is not None -def test_multi_engine_cache(pg, ls_con, tmp_path): - db_con = ibis.duckdb.connect() +@pytest.mark.parametrize("other_con", [letsql.connect(), ibis.duckdb.connect()]) +def test_multi_engine_cache(pg, ls_con, tmp_path, other_con): + other_con = ibis.duckdb.connect() table_name = "batting" pg_t = pg.table(table_name)[lambda t: t.yearID > 2014].pipe( ls_con.register, f"pg-{table_name}" ) - db_t = db_con.register(pg.table(table_name).to_pyarrow(), f"{table_name}")[ + db_t = other_con.register(pg.table(table_name).to_pyarrow(), f"{table_name}")[ lambda t: t.stint == 1 ].pipe(ls_con.register, f"db-{table_name}") @@ -625,7 +626,7 @@ def test_replace_table_matching_kwargs(pg, ls_con, tmp_path): def test_cache_default_path_set(pg, ls_con, tmp_path): - letsql.options.cache_default_path = tmp_path + letsql.options.cache.default_path = tmp_path storage = ParquetCacheStorage( source=ls_con, diff --git a/python/letsql/backends/let/tests/test_execute.py b/python/letsql/backends/let/tests/test_execute.py index 4630cd84..1d211e32 100644 --- a/python/letsql/backends/let/tests/test_execute.py +++ b/python/letsql/backends/let/tests/test_execute.py @@ -7,6 +7,7 @@ import pytest from pytest import param +import letsql from letsql.tests.util import ( assert_frame_equal, ) @@ -341,6 +342,7 @@ def test_multiple_execution_letsql_register_table(con, csv_dir): @pytest.mark.parametrize( "other_con", [ + letsql.connect(), ibis.datafusion.connect(), ibis.duckdb.connect(), ibis.postgres.connect( @@ -497,7 +499,14 @@ def test_register_arbitrary_expression_multiple_tables(con, duckdb_con): assert_frame_equal(result, expected, check_like=True) -def test_multiple_pipes(ls_con, pg): +@pytest.mark.parametrize( + "new_con", + [ + letsql.connect(), + ibis.duckdb.connect(), + ], +) +def test_multiple_pipes(ls_con, pg, new_con): """This test address the issue reported on bug #69 link: https://github.com/letsql/letsql/issues/69 @@ -506,12 +515,11 @@ def test_multiple_pipes(ls_con, pg): In this test (and the rest) ls_con is a clean (no tables) letsql connection """ - duckdb_con = ibis.duckdb.connect() table_name = "batting" pg_t = pg.table(table_name)[lambda t: t.yearID == 2015].pipe( ls_con.register, f"pg-{table_name}" ) - db_t = duckdb_con.register(pg_t.to_pyarrow(), f"{table_name}")[ + db_t = new_con.register(pg_t.to_pyarrow(), f"{table_name}")[ lambda t: t.yearID == 2014 ].pipe(ls_con.register, f"db-{table_name}") diff --git a/python/letsql/common/caching.py b/python/letsql/common/caching.py index b4e8f02d..3ce86938 100644 --- a/python/letsql/common/caching.py +++ b/python/letsql/common/caching.py @@ -114,7 +114,7 @@ class ParquetCacheStorage(CacheStorage): path = field( validator=instance_of(pathlib.Path), converter=abs_path_converter, - factory=functools.partial(letsql.options.get, "cache_default_path"), + factory=functools.partial(letsql.options.get, "cache.default_path"), ) def __attrs_post_init__(self): diff --git a/python/letsql/common/utils/dask_normalize_function.py b/python/letsql/common/utils/dask_normalize_function.py index a936bac5..af96391f 100644 --- a/python/letsql/common/utils/dask_normalize_function.py +++ b/python/letsql/common/utils/dask_normalize_function.py @@ -65,6 +65,7 @@ def unwrap(obj, attr_name): normalize_code = normalize_by_attrs(CODE_ATTRS) dask.base.normalize_token.register(types.CodeType, normalize_code) +dask.base.normalize_token.register(property, normalize_code) @dask.base.normalize_token.register(toolz.curry) diff --git a/python/letsql/config.py b/python/letsql/config.py index 34327c44..c3f8d68c 100644 --- a/python/letsql/config.py +++ b/python/letsql/config.py @@ -1,22 +1,124 @@ import pathlib -from typing import Union +from typing import Union, Optional, Any +import ibis from ibis.config import Config -class Options(Config): - """LETSQL configuration options +class Cache(Config): + """LETSQL cache configuration options Attributes ---------- - cache_default_path : str + default_path : str """ - cache_default_path: Union[str, pathlib.Path] = pathlib.Path( + default_path: Union[str, pathlib.Path] = pathlib.Path( "~/.local/share/letsql" ).expanduser() +class Interactive(Config): + """Options controlling the interactive repr.""" + + @property + def max_rows(self) -> int: + return ibis.options.repr.interactive.max_rows + + @max_rows.setter + def max_rows(self, value: int): + ibis.options.repr.interactive.max_rows = value + + @property + def max_columns(self) -> Optional[int]: + return ibis.options.repr.interactive.max_columns + + @max_columns.setter + def max_columns(self, value: Optional[int]): + ibis.options.repr.interactive.max_columns = value + + @property + def max_length(self) -> int: + return ibis.options.repr.interactive.max_length + + @max_length.setter + def max_length(self, value: int): + ibis.options.repr.interactive.max_length = value + + @property + def max_string(self) -> int: + return ibis.options.repr.interactive.max_string + + @max_string.setter + def max_string(self, value: int): + ibis.options.repr.interactive.max_string = value + + @property + def max_depth(self) -> int: + return ibis.options.repr.interactive.max_depth + + @max_depth.setter + def max_depth(self, value: int): + ibis.options.repr.interactive.max_depth = value + + @property + def show_types(self) -> bool: + return ibis.options.repr.interactive.show_types + + @show_types.setter + def show_types(self, value: bool): + ibis.options.repr.interactive.show_types = value + + +class Repr(Config): + """Expression printing options. + + Attributes + ---------- + interactive : Interactive + Options controlling the interactive repr. + """ + + interactive: Interactive = Interactive() + + +class Options(Config): + """LETSQL configuration options + + Attributes + ---------- + cache : Cache + Options controlling caching. + backend : Optional[letsql.backends.let.Backend] + The backend to use for execution. + repr : Repr + Options controlling expression printing. + """ + + cache: Cache = Cache() + backend: Optional[Any] = None + repr: Repr = Repr() + + @property + def interactive(self) -> bool: + """Show the first few rows of computing an expression when in a repl.""" + return ibis.options.interactive + + @interactive.setter + def interactive(self, value: bool): + ibis.options.interactive = value + + options = Options() + + +def _backend_init(): + if (backend := options.backend) is not None: + return backend + + import letsql + + options.backend = con = letsql.connect() + return con diff --git a/python/letsql/expr/api.py b/python/letsql/expr/api.py new file mode 100644 index 00000000..2c8d4702 --- /dev/null +++ b/python/letsql/expr/api.py @@ -0,0 +1,1517 @@ +"""LETSQL expression API definitions.""" + +from __future__ import annotations + +import datetime +import functools +from typing import TYPE_CHECKING, Any, Union, overload + +import ibis +import ibis.expr.builders as bl +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir +from ibis import api +from ibis.common.deferred import Deferred, _, deferrable +from ibis.expr.schema import Schema +from ibis.expr.types import ( + Column, + DateValue, + Scalar, + Table, + TimestampValue, + TimeValue, + Value, + array, + literal, + map, + null, + struct, +) + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + from pathlib import Path + + from ibis.expr.schema import SchemaLike + +__all__ = ( + "Column", + "Deferred", + "Scalar", + "Schema", + "Table", + "Value", + "_", + "aggregate", + "and_", + "array", + "asc", + "asof_join", + "case", + "coalesce", + "cross_join", + "cume_dist", + "date", + "deferred", + "dense_rank", + "desc", + "difference", + "e", + "following", + "greatest", + "ifelse", + "infer_dtype", + "infer_schema", + "intersect", + "interval", + "join", + "least", + "literal", + "map", + "memtable", + "now", + "ntile", + "null", + "or_", + "param", + "percent_rank", + "pi", + "preceding", + "random", + "range", + "rank", + "read_csv", + "read_parquet", + "row_number", + "schema", + "struct", + "table", + "time", + "today", + "timestamp", + "union", + "uuid", +) + +infer_dtype = dt.infer +infer_schema = sch.infer +aggregate = ir.Table.aggregate +cross_join = ir.Table.cross_join +join = ir.Table.join +asof_join = ir.Table.asof_join + +e = ops.E().to_expr() +pi = ops.Pi().to_expr() + +deferred = _ + + +def param(type: Union[dt.DataType, str]) -> ir.Scalar: + """Create a deferred parameter of a given type. + + Parameters + ---------- + type + The type of the unbound parameter, e.g., double, int64, date, etc. + + Returns + ------- + Scalar + A scalar expression backend by a parameter + + Examples + -------- + >>> from datetime import date + >>> import letsql + >>> start = letsql.param("date") + >>> t = letsql.memtable( + ... { + ... "date_col": [date(2013, 1, 1), date(2013, 1, 2), date(2013, 1, 3)], + ... "value": [1.0, 2.0, 3.0], + ... }, + ... ) + >>> expr = t.filter(t.date_col >= start).value.sum() + >>> expr.execute(params={start: date(2013, 1, 1)}) + 6.0 + >>> expr.execute(params={start: date(2013, 1, 2)}) + 5.0 + >>> expr.execute(params={start: date(2013, 1, 3)}) + 3.0 + + """ + return api.param(type) + + +def schema( + pairs: SchemaLike | None = None, + names: Iterable[str] | None = None, + types: Iterable[str | dt.DataType] | None = None, +) -> sch.Schema: + """Validate and return a [`Schema`](./schemas.qmd#ibis.expr.schema.Schema) object. + + Parameters + ---------- + pairs + List or dictionary of name, type pairs. Mutually exclusive with `names` + and `types` arguments. + names + Field names. Mutually exclusive with `pairs`. + types + Field types. Mutually exclusive with `pairs`. + + Returns + ------- + Schema + An ibis schema + + Examples + -------- + >>> from letsql import schema + >>> sc = schema([("foo", "string"), ("bar", "int64"), ("baz", "boolean")]) + >>> sc = schema(names=["foo", "bar", "baz"], types=["string", "int64", "boolean"]) + >>> sc = schema(dict(foo="string")) # no-op + + """ + return api.schema(pairs=pairs, names=names, types=types) + + +def table( + schema: SchemaLike | None = None, + name: str | None = None, + catalog: str | None = None, + database: str | None = None, +) -> ir.Table: + """Create a table literal or an abstract table without data. + + Ibis uses the word database to refer to a collection of tables, and the word + catalog to refer to a collection of databases. You can use a combination of + `catalog` and `database` to specify a hierarchical location for table. + + Parameters + ---------- + schema + A schema for the table + name + Name for the table. One is generated if this value is `None`. + catalog + A collection of database. + database + A collection of tables. Required if catalog is not `None`. + + Returns + ------- + Table + A table expression + + Examples + -------- + Create a table with no data backing it + + >>> import letsql + >>> letsql.options.interactive = False + >>> t = letsql.table(schema=dict(a="int", b="string"), name="t") + >>> t + UnboundTable: t + a int64 + b string + + + Create a table with no data backing it in a specific location + + >>> import letsql + >>> letsql.options.interactive = False + >>> t = letsql.table(schema=dict(a="int"), name="t", catalog="cat", database="db") + >>> t + UnboundTable: cat.db.t + a int64 + """ + return api.table(schema=schema, name=name, catalog=catalog, database=database) + + +def memtable( + data, + *, + columns: Iterable[str] | None = None, + schema: SchemaLike | None = None, + name: str | None = None, +) -> Table: + """Construct an ibis table expression from in-memory data. + + Parameters + ---------- + data + A table-like object (`pandas.DataFrame`, `pyarrow.Table`, or + `polars.DataFrame`), or any data accepted by the `pandas.DataFrame` + constructor (e.g. a list of dicts). + + Note that ibis objects (e.g. `MapValue`) may not be passed in as part + of `data` and will result in an error. + + Do not depend on the underlying storage type (e.g., pyarrow.Table), + it's subject to change across non-major releases. + columns + Optional [](`typing.Iterable`) of [](`str`) column names. If provided, + must match the number of columns in `data`. + schema + Optional [`Schema`](./schemas.qmd#ibis.expr.schema.Schema). + The functions use `data` to infer a schema if not passed. + name + Optional name of the table. + + Returns + ------- + Table + A table expression backed by in-memory data. + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = False + >>> t = letsql.memtable([{"a": 1}, {"a": 2}]) + >>> t + InMemoryTable + data: + PandasDataFrameProxy: + a + 0 1 + 1 2 + + >>> t = letsql.memtable([{"a": 1, "b": "foo"}, {"a": 2, "b": "baz"}]) + >>> t + InMemoryTable + data: + PandasDataFrameProxy: + a b + 0 1 foo + 1 2 baz + + Create a table literal without column names embedded in the data and pass + `columns` + + >>> t = letsql.memtable([(1, "foo"), (2, "baz")], columns=["a", "b"]) + >>> t + InMemoryTable + data: + PandasDataFrameProxy: + a b + 0 1 foo + 1 2 baz + + Create a table literal without column names embedded in the data. Ibis + generates column names if none are provided. + + >>> t = letsql.memtable([(1, "foo"), (2, "baz")]) + >>> t + InMemoryTable + data: + PandasDataFrameProxy: + col0 col1 + 0 1 foo + 1 2 baz + + """ + return api.memtable(data, columns=columns, schema=schema, name=name) + + +def desc(expr: ir.Column | str) -> ir.Value: + """Create a descending sort key from `expr` or column name. + + Parameters + ---------- + expr + The expression or column name to use for sorting + + See Also + -------- + [`Value.desc()`](./expression-generic.qmd#ibis.expr.types.generic.Value.desc) + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.examples.penguins.fetch() + >>> t[["species", "year"]].order_by(letsql.desc("year")).head() + ┏━━━━━━━━━┳━━━━━━━┓ + ┃ species ┃ year ┃ + ┡━━━━━━━━━╇━━━━━━━┩ + │ string │ int64 │ + ├─────────┼───────┤ + │ Adelie │ 2009 │ + │ Adelie │ 2009 │ + │ Adelie │ 2009 │ + │ Adelie │ 2009 │ + │ Adelie │ 2009 │ + └─────────┴───────┘ + + Returns + ------- + ir.ValueExpr + An expression + + """ + return api.desc(expr) + + +def asc(expr: ir.Column | str) -> ir.Value: + """Create an ascending sort key from `asc` or column name. + + Parameters + ---------- + expr + The expression or column name to use for sorting + + See Also + -------- + [`Value.asc()`](./expression-generic.qmd#ibis.expr.types.generic.Value.asc) + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.examples.penguins.fetch() + >>> t[["species", "year"]].order_by(letsql.asc("year")).head() + ┏━━━━━━━━━┳━━━━━━━┓ + ┃ species ┃ year ┃ + ┡━━━━━━━━━╇━━━━━━━┩ + │ string │ int64 │ + ├─────────┼───────┤ + │ Adelie │ 2007 │ + │ Adelie │ 2007 │ + │ Adelie │ 2007 │ + │ Adelie │ 2007 │ + │ Adelie │ 2007 │ + └─────────┴───────┘ + + Returns + ------- + ir.ValueExpr + An expression + + """ + return api.asc(expr) + + +def preceding(value) -> ir.Value: + return api.preceding(value) + + +def following(value) -> ir.Value: + return api.following(value) + + +def and_(*predicates: ir.BooleanValue) -> ir.BooleanValue: + """Combine multiple predicates using `&`. + + Parameters + ---------- + predicates + Boolean value expressions + + Returns + ------- + BooleanValue + A new predicate that evaluates to True if all composing predicates are + True. If no predicates were provided, returns True. + + """ + return api.and_(*predicates) + + +def or_(*predicates: ir.BooleanValue) -> ir.BooleanValue: + """Combine multiple predicates using `|`. + + Parameters + ---------- + predicates + Boolean value expressions + + Returns + ------- + BooleanValue + A new predicate that evaluates to True if any composing predicates are + True. If no predicates were provided, returns False. + + """ + return api.or_(*predicates) + + +def random() -> ir.FloatingScalar: + """Return a random floating point number in the range [0.0, 1.0). + + Similar to [](`random.random`) in the Python standard library. + + ::: {.callout-note} + ## Repeated use of `random` + + `ibis.random()` will generate a column of distinct random numbers even if + the same instance of `ibis.random()` is re-used. + + When Ibis compiles an expression to SQL, each place where `random` is used + will render as a separate call to the given backend's random number + generator. + + >>> import letsql + >>> r_a = letsql.random() # doctest: +SKIP + + Returns + ------- + FloatingScalar + Random float value expression + + """ + return api.random() + + +def uuid() -> ir.UUIDScalar: + """Return a random UUID version 4 value. + + Similar to [('uuid.uuid4`) in the Python standard library. + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> letsql.uuid() # doctest: +SKIP + UUID('e57e927b-aed2-483b-9140-dc32a26cad95') + + Returns + ------- + UUIDScalar + Random UUID value expression + """ + return api.uuid() + + +def case() -> bl.SearchedCaseBuilder: + """Begin constructing a case expression. + + Use the `.when` method on the resulting object followed by `.end` to create a + complete case expression. + + Returns + ------- + SearchedCaseBuilder + A builder object to use for constructing a case expression. + + See Also + -------- + [`Value.case()`](./expression-generic.qmd#ibis.expr.types.generic.Value.case) + + Examples + -------- + >>> import letsql + >>> from ibis import _ + >>> letsql.options.interactive = True + >>> t = letsql.memtable( + ... { + ... "left": [1, 2, 3, 4], + ... "symbol": ["+", "-", "*", "/"], + ... "right": [5, 6, 7, 8], + ... } + ... ) + >>> t.mutate( + ... result=( + ... letsql.case() + ... .when(_.symbol == "+", _.left + _.right) + ... .when(_.symbol == "-", _.left - _.right) + ... .when(_.symbol == "*", _.left * _.right) + ... .when(_.symbol == "/", _.left / _.right) + ... .end() + ... ) + ... ) + ┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓ + ┃ left ┃ symbol ┃ right ┃ result ┃ + ┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩ + │ int64 │ string │ int64 │ float64 │ + ├───────┼────────┼───────┼─────────┤ + │ 1 │ + │ 5 │ 6.0 │ + │ 2 │ - │ 6 │ -4.0 │ + │ 3 │ * │ 7 │ 21.0 │ + │ 4 │ / │ 8 │ 0.5 │ + └───────┴────────┴───────┴─────────┘ + + """ + return api.case() + + +def now() -> ir.TimestampScalar: + """Return an expression that will compute the current timestamp. + + Returns + ------- + TimestampScalar + An expression representing the current timestamp. + + """ + return api.now() + + +def today() -> ir.DateScalar: + """Return an expression that will compute the current date. + + Returns + ------- + DateScalar + An expression representing the current date. + + """ + return api.today() + + +def rank() -> ir.IntegerColumn: + """Compute position of first element within each equal-value group in sorted order. + + Equivalent to SQL's `RANK()` window function. + + Returns + ------- + Int64Column + The min rank + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"values": [1, 2, 1, 2, 3, 2]}) + >>> t.mutate(rank=letsql.rank().over(order_by=t.values)) + ┏━━━━━━━━┳━━━━━━━┓ + ┃ values ┃ rank ┃ + ┡━━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ + ├────────┼───────┤ + │ 1 │ 0 │ + │ 1 │ 0 │ + │ 2 │ 2 │ + │ 2 │ 2 │ + │ 2 │ 2 │ + │ 3 │ 5 │ + └────────┴───────┘ + + """ + return api.rank() + + +def dense_rank() -> ir.IntegerColumn: + """Position of first element within each group of equal values. + + Values are returned in sorted order and duplicate values are ignored. + + Equivalent to SQL's `DENSE_RANK()`. + + Returns + ------- + IntegerColumn + The rank + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"values": [1, 2, 1, 2, 3, 2]}) + >>> t.mutate(rank=letsql.dense_rank().over(order_by=t.values)) + ┏━━━━━━━━┳━━━━━━━┓ + ┃ values ┃ rank ┃ + ┡━━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ + ├────────┼───────┤ + │ 1 │ 0 │ + │ 1 │ 0 │ + │ 2 │ 1 │ + │ 2 │ 1 │ + │ 2 │ 1 │ + │ 3 │ 2 │ + └────────┴───────┘ + + """ + return api.dense_rank() + + +def percent_rank() -> ir.FloatingColumn: + """Return the relative rank of the values in the column. + + Returns + ------- + FloatingColumn + The percent rank + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"values": [1, 2, 1, 2, 3, 2]}) + >>> t.mutate(pct_rank=letsql.percent_rank().over(order_by=t.values)) + ┏━━━━━━━━┳━━━━━━━━━━┓ + ┃ values ┃ pct_rank ┃ + ┡━━━━━━━━╇━━━━━━━━━━┩ + │ int64 │ float64 │ + ├────────┼──────────┤ + │ 1 │ 0.0 │ + │ 1 │ 0.0 │ + │ 2 │ 0.4 │ + │ 2 │ 0.4 │ + │ 2 │ 0.4 │ + │ 3 │ 1.0 │ + └────────┴──────────┘ + + """ + return api.percent_rank() + + +def cume_dist() -> ir.FloatingColumn: + """Return the cumulative distribution over a window. + + Returns + ------- + FloatingColumn + The cumulative distribution + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"values": [1, 2, 1, 2, 3, 2]}) + >>> t.mutate(dist=letsql.cume_dist().over(order_by=t.values)) + ┏━━━━━━━━┳━━━━━━━━━━┓ + ┃ values ┃ dist ┃ + ┡━━━━━━━━╇━━━━━━━━━━┩ + │ int64 │ float64 │ + ├────────┼──────────┤ + │ 1 │ 0.333333 │ + │ 1 │ 0.333333 │ + │ 2 │ 0.833333 │ + │ 2 │ 0.833333 │ + │ 2 │ 0.833333 │ + │ 3 │ 1.000000 │ + └────────┴──────────┘ + + """ + return api.cume_dist() + + +def ntile(buckets: int | ir.IntegerValue) -> ir.IntegerColumn: + """Return the integer number of a partitioning of the column values. + + Parameters + ---------- + buckets + Number of buckets to partition into + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"values": [1, 2, 1, 2, 3, 2]}) + >>> t.mutate(ntile=letsql.ntile(2).over(order_by=t.values)) + ┏━━━━━━━━┳━━━━━━━┓ + ┃ values ┃ ntile ┃ + ┡━━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ + ├────────┼───────┤ + │ 1 │ 0 │ + │ 1 │ 0 │ + │ 2 │ 0 │ + │ 2 │ 1 │ + │ 2 │ 1 │ + │ 3 │ 1 │ + └────────┴───────┘ + + """ + return api.ntile(buckets) + + +def row_number() -> ir.IntegerColumn: + """Return an analytic function expression for the current row number. + + ::: {.callout-note} + `row_number` is normalized across backends to start at 0 + ::: + + Returns + ------- + IntegerColumn + A column expression enumerating rows + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"values": [1, 2, 1, 2, 3, 2]}) + >>> t.mutate(rownum=letsql.row_number()) + ┏━━━━━━━━┳━━━━━━━━┓ + ┃ values ┃ rownum ┃ + ┡━━━━━━━━╇━━━━━━━━┩ + │ int64 │ int64 │ + ├────────┼────────┤ + │ 1 │ 0 │ + │ 2 │ 1 │ + │ 1 │ 2 │ + │ 2 │ 3 │ + │ 3 │ 4 │ + │ 2 │ 5 │ + └────────┴────────┘ + + """ + return api.row_number() + + +def read_csv( + sources: str | Path | Sequence[str | Path], + table_name: str | None = None, + **kwargs: Any, +) -> ir.Table: + """Lazily load a CSV or set of CSVs. + + This function delegates to the `read_csv` method on the current default + backend (DuckDB or `ibis.config.default_backend`). + + Parameters + ---------- + sources + A filesystem path or URL or list of same. Supports CSV and TSV files. + table_name + A name to refer to the table. If not provided, a name will be generated. + kwargs + Backend-specific keyword arguments for the file type. For the DuckDB + backend used by default, please refer to: + + * CSV/TSV: https://duckdb.org/docs/data/csv/overview.html#parameters. + + Returns + ------- + ir.Table + Table expression representing a file + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> lines = '''a,b + ... 1,d + ... 2, + ... ,f + ... ''' + >>> with open("/tmp/lines.csv", mode="w") as f: + ... nbytes = f.write(lines) # nbytes is unused + >>> t = letsql.read_csv("/tmp/lines.csv") + >>> t + ┏━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ b ┃ + ┡━━━━━━━╇━━━━━━━━┩ + │ int64 │ string │ + ├───────┼────────┤ + │ 1 │ d │ + │ 2 │ NULL │ + │ NULL │ f │ + └───────┴────────┘ + + """ + from letsql.config import _backend_init + + con = _backend_init() + return con.read_csv(sources, table_name=table_name, **kwargs) + + +def read_parquet( + sources: str | Path | Sequence[str | Path], + table_name: str | None = None, + **kwargs: Any, +) -> ir.Table: + """Lazily load a parquet file or set of parquet files. + + This function delegates to the `read_parquet` method on the current default + backend (DuckDB or `ibis.config.default_backend`). + + Parameters + ---------- + sources + A filesystem path or URL or list of same. + table_name + A name to refer to the table. If not provided, a name will be generated. + kwargs + Backend-specific keyword arguments for the file type. For the DuckDB + backend used by default, please refer to: + + * Parquet: https://duckdb.org/docs/data/parquet + + Returns + ------- + ir.Table + Table expression representing a file + + Examples + -------- + >>> import letsql + >>> import pandas as pd + >>> letsql.options.interactive = True + >>> df = pd.DataFrame({"a": [1, 2, 3], "b": list("ghi")}) + >>> df + a b + 0 1 g + 1 2 h + 2 3 i + >>> df.to_parquet("/tmp/data.parquet") + >>> t = letsql.read_parquet("/tmp/data.parquet") + >>> t + ┏━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ b ┃ + ┡━━━━━━━╇━━━━━━━━┩ + │ int64 │ string │ + ├───────┼────────┤ + │ 1 │ g │ + │ 2 │ h │ + │ 3 │ i │ + └───────┴────────┘ + + """ + from letsql.config import _backend_init + + con = _backend_init() + return con.read_parquet(sources, table_name=table_name, **kwargs) + + +def union(table: ir.Table, *rest: ir.Table, distinct: bool = False) -> ir.Table: + """Compute the set union of multiple table expressions. + + The input tables must have identical schemas. + + Parameters + ---------- + table + A table expression + *rest + Additional table expressions + distinct + Only return distinct rows + + Returns + ------- + Table + A new table containing the union of all input tables. + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t1 = letsql.memtable({"a": [1, 2]}) + >>> t1 + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 1 │ + │ 2 │ + └───────┘ + >>> t2 = letsql.memtable({"a": [2, 3]}) + >>> t2 + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 2 │ + │ 3 │ + └───────┘ + >>> letsql.union(t1, t2) # union all by default + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 1 │ + │ 2 │ + │ 2 │ + │ 3 │ + └───────┘ + >>> letsql.union(t1, t2, distinct=True).order_by("a") + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 1 │ + │ 2 │ + │ 3 │ + └───────┘ + + """ + return table.union(*rest, distinct=distinct) if rest else table + + +def intersect(table: ir.Table, *rest: ir.Table, distinct: bool = True) -> ir.Table: + """Compute the set intersection of multiple table expressions. + + The input tables must have identical schemas. + + Parameters + ---------- + table + A table expression + *rest + Additional table expressions + distinct + Only return distinct rows + + Returns + ------- + Table + A new table containing the intersection of all input tables. + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t1 = letsql.memtable({"a": [1, 2]}) + >>> t1 + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 1 │ + │ 2 │ + └───────┘ + >>> t2 = letsql.memtable({"a": [2, 3]}) + >>> t2 + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 2 │ + │ 3 │ + └───────┘ + >>> letsql.intersect(t1, t2) + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 2 │ + └───────┘ + + """ + return table.intersect(*rest, distinct=distinct) if rest else table + + +def difference(table: ir.Table, *rest: ir.Table, distinct: bool = True) -> ir.Table: + """Compute the set difference of multiple table expressions. + + The input tables must have identical schemas. + + Parameters + ---------- + table + A table expression + *rest + Additional table expressions + distinct + Only diff distinct rows not occurring in the calling table + + Returns + ------- + Table + The rows present in `self` that are not present in `tables`. + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t1 = letsql.memtable({"a": [1, 2]}) + >>> t1 + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 1 │ + │ 2 │ + └───────┘ + >>> t2 = letsql.memtable({"a": [2, 3]}) + >>> t2 + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 2 │ + │ 3 │ + └───────┘ + >>> letsql.difference(t1, t2) + ┏━━━━━━━┓ + ┃ a ┃ + ┡━━━━━━━┩ + │ int64 │ + ├───────┤ + │ 1 │ + └───────┘ + + """ + return table.difference(*rest, distinct=distinct) if rest else table + + +@deferrable +def ifelse(condition: Any, true_expr: Any, false_expr: Any) -> ir.Value: + """Construct a ternary conditional expression. + + Parameters + ---------- + condition + A boolean expression + true_expr + Expression to return if `condition` evaluates to `True` + false_expr + Expression to return if `condition` evaluates to `False` or `NULL` + + Returns + ------- + Value : ir.Value + The value of `true_expr` if `condition` is `True` else `false_expr` + + See Also + -------- + [`BooleanValue.ifelse()`](./expression-numeric.qmd#ibis.expr.types.logical.BooleanValue.ifelse) + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> t = letsql.memtable({"condition": [True, False, True, None]}) + >>> letsql.ifelse(t.condition, "yes", "no") + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ IfElse(condition, 'yes', 'no') ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ string │ + ├────────────────────────────────┤ + │ yes │ + │ no │ + │ yes │ + │ no │ + └────────────────────────────────┘ + + """ + return api.ifelse(condition, true_expr, false_expr) + + +@deferrable +def coalesce(*args: Any) -> ir.Value: + """Return the first non-null value from `args`. + + Parameters + ---------- + args + Arguments from which to choose the first non-null value + + Returns + ------- + Value + Coalesced expression + + See Also + -------- + [`Value.coalesce()`](#ibis.expr.types.generic.Value.coalesce) + [`Value.fill_null()`](#ibis.expr.types.generic.Value.fill_null) + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> letsql.coalesce(None, 4, 5) + 4 + + """ + return api.coalesce(*args) + + +@deferrable +def greatest(*args: Any) -> ir.Value: + """Compute the largest value among the supplied arguments. + + Parameters + ---------- + args + Arguments to choose from + + Returns + ------- + Value + Maximum of the passed arguments + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> letsql.greatest(None, 4, 5) + 5 + + """ + return api.greatest(*args) + + +@deferrable +def least(*args: Any) -> ir.Value: + """Compute the smallest value among the supplied arguments. + + Parameters + ---------- + args + Arguments to choose from + + Returns + ------- + Value + Minimum of the passed arguments + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + >>> letsql.least(None, 4, 5) + 4 + + """ + return api.least(*args) + + +@functools.singledispatch +def range(start, stop, step) -> ir.ArrayValue: + """Generate a range of values. + + Integer ranges are supported, as well as timestamp ranges. + + ::: {.callout-note} + `start` is inclucive and `stop` is exclusive, just like Python's builtin + [`range`](range). + + When `step` equals 0, however, this function will return an empty array. + + Python's `range` will raise an exception when `step` is zero. + ::: + + Parameters + ---------- + start + Lower bound of the range, inclusive. + stop + Upper bound of the range, exclusive. + step + Step value. Optional, defaults to 1. + + Returns + ------- + ArrayValue + An array of values + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + + Range using only a stop argument + + >>> letsql.range(5) + [0, 1, ... +3] + + Simple range using start and stop + + >>> letsql.range(1, 5) + [1, 2, ... +2] + + + Generate an empty range + + >>> letsql.range(0) + [] + + Negative step values are supported + + >>> letsql.range(10, 4, -2) + [10, 8, ... +1] + + + `ibis.range` behaves the same as Python's range ... + + >>> letsql.range(0, 7, -1) + [] + + ... except when the step is zero, in which case `ibis.range` returns an + empty array + + >>> letsql.range(0, 5, 0) + [] + + Because the resulting expression is array, you can unnest the values + + >>> letsql.range(5).unnest().name("numbers") + ┏━━━━━━━━━┓ + ┃ numbers ┃ + ┡━━━━━━━━━┩ + │ int8 │ + ├─────────┤ + │ 0 │ + │ 1 │ + │ 2 │ + │ 3 │ + │ 4 │ + └─────────┘ + + """ + raise NotImplementedError() + + +@range.register(int) +@range.register(ir.IntegerValue) +def _int_range( + start: int, + stop: int | ir.IntegerValue | None = None, + step: int | ir.IntegerValue | None = None, +) -> ir.ArrayValue: + if stop is None: + stop = start + start = 0 + if step is None: + step = 1 + return ops.IntegerRange(start=start, stop=stop, step=step).to_expr() + + +@overload +def timestamp(value_or_year: Any, /, timezone: str | None = None) -> TimestampValue: ... + + +@overload +def timestamp( + value_or_year: int | ir.IntegerValue | Deferred, + month: int | ir.IntegerValue | Deferred, + day: int | ir.IntegerValue | Deferred, + hour: int | ir.IntegerValue | Deferred, + minute: int | ir.IntegerValue | Deferred, + second: int | ir.IntegerValue | Deferred, + /, + timezone: str | None = None, +) -> TimestampValue: ... + + +@deferrable +def timestamp( + value_or_year, + month=None, + day=None, + hour=None, + minute=None, + second=None, + /, + timezone=None, +): + """Construct a timestamp scalar or column. + + Parameters + ---------- + value_or_year + Either a string value or `datetime.datetime` to coerce to a timestamp, + or an integral value representing the timestamp year component. + month + The timestamp month component; required if `value_or_year` is a year. + day + The timestamp day component; required if `value_or_year` is a year. + hour + The timestamp hour component; required if `value_or_year` is a year. + minute + The timestamp minute component; required if `value_or_year` is a year. + second + The timestamp second component; required if `value_or_year` is a year. + timezone + The timezone name, or none for a timezone-naive timestamp. + + Returns + ------- + TimestampValue + A timestamp expression + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + + Create a timestamp scalar from a string + + >>> letsql.timestamp("2023-01-02T03:04:05") + Timestamp('2023-01-02 03:04:05') + + + Create a timestamp scalar from components + + >>> letsql.timestamp(2023, 1, 2, 3, 4, 5) + Timestamp('2023-01-02 03:04:05') + + + Create a timestamp column from components + + >>> t = letsql.memtable({"y": [2001, 2002], "m": [1, 4], "d": [2, 5], "h": [3, 6]}) + >>> letsql.timestamp(t.y, t.m, t.d, t.h, 0, 0).name("timestamp") + ┏━━━━━━━━━━━━━━━━━━━━━┓ + ┃ timestamp ┃ + ┡━━━━━━━━━━━━━━━━━━━━━┩ + │ timestamp │ + ├─────────────────────┤ + │ 2001-01-02 03:00:00 │ + │ 2002-04-05 06:00:00 │ + └─────────────────────┘ + + """ + return ibis.timestamp(value_or_year, month, day, hour, minute, second, timezone) + + +@overload +def date( + value_or_year: int | ir.IntegerValue | Deferred, + month: int | ir.IntegerValue | Deferred, + day: int | ir.IntegerValue | Deferred, + /, +) -> DateValue: ... + + +@overload +def date(value_or_year: Any, /) -> DateValue: ... + + +@deferrable +def date(value_or_year, month=None, day=None, /): + return ibis.date(value_or_year, month, day) + + +@overload +def time( + value_or_hour: int | ir.IntegerValue | Deferred, + minute: int | ir.IntegerValue | Deferred, + second: int | ir.IntegerValue | Deferred, + /, +) -> TimeValue: ... + + +@overload +def time(value_or_hour: Any, /) -> TimeValue: ... + + +@deferrable +def time(value_or_hour, minute=None, second=None, /): + """Return a time literal if `value` is coercible to a time. + + Parameters + ---------- + value_or_hour + Either a string value or `datetime.time` to coerce to a time, or + an integral value representing the time hour component. + minute + The time minute component; required if `value_or_hour` is an hour. + second + The time second component; required if `value_or_hour` is an hour. + + Returns + ------- + TimeValue + A time expression + + Examples + -------- + >>> import letsql + >>> letsql.options.interactive = True + + Create a time scalar from a string + + >>> letsql.time("01:02:03") + datetime.time(1, 2, 3) + + + Create a time scalar from hour, minute, and second + + >>> letsql.time(1, 2, 3) + datetime.time(1, 2, 3) + + + Create a time column from hour, minute, and second + + >>> t = ibis.memtable({"h": [1, 4], "m": [2, 5], "s": [3, 6]}) + >>> ibis.time(t.h, t.m, t.s).name("time") + ┏━━━━━━━━━━┓ + ┃ time ┃ + ┡━━━━━━━━━━┩ + │ time │ + ├──────────┤ + │ 01:02:03 │ + │ 04:05:06 │ + └──────────┘ + + """ + return ibis.time(value_or_hour, minute, second) + + +def interval( + value: int | datetime.timedelta | None = None, + unit: str = "s", + *, + years: int | None = None, + quarters: int | None = None, + months: int | None = None, + weeks: int | None = None, + days: int | None = None, + hours: int | None = None, + minutes: int | None = None, + seconds: int | None = None, + milliseconds: int | None = None, + microseconds: int | None = None, + nanoseconds: int | None = None, +) -> ir.IntervalScalar: + """Return an interval literal expression. + + Parameters + ---------- + value + Interval value. + unit + Unit of `value` + years + Number of years + quarters + Number of quarters + months + Number of months + weeks + Number of weeks + days + Number of days + hours + Number of hours + minutes + Number of minutes + seconds + Number of seconds + milliseconds + Number of milliseconds + microseconds + Number of microseconds + nanoseconds + Number of nanoseconds + + Returns + ------- + IntervalScalar + An interval expression + + """ + return ibis.interval( + value, + unit, + years=years, + quarters=quarters, + months=months, + weeks=weeks, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + milliseconds=milliseconds, + microseconds=microseconds, + nanoseconds=nanoseconds, + ) diff --git a/python/letsql/internal.py b/python/letsql/internal.py index fd4bff4e..0a9d6a6c 100644 --- a/python/letsql/internal.py +++ b/python/letsql/internal.py @@ -3,7 +3,7 @@ import pyarrow as pa -from ._internal import ( +from letsql._internal import ( AggregateUDF, ContextProvider, LogicalPlan, @@ -16,6 +16,7 @@ SessionState, # noqa: F401 TableProvider, Table, + DataFrame, ) __all__ = [ @@ -34,6 +35,7 @@ "TableProvider", "AbstractTableProvider", "Table", + "DataFrame", ] diff --git a/python/letsql/tests/test_aggregation.py b/python/letsql/tests/test_aggregation.py index 0aa76c37..1de8664d 100644 --- a/python/letsql/tests/test_aggregation.py +++ b/python/letsql/tests/test_aggregation.py @@ -10,6 +10,7 @@ from ibis import literal as L from pytest import param +import letsql from letsql.tests.util import assert_frame_equal, reduction_tolerance @@ -215,7 +216,7 @@ def test_aggregate_grouped(alltypes, df, result_fn, expected_fn): id="is_in", ), param( - lambda _: ibis._.string_col.isin(["1", "7"]), + lambda _: letsql._.string_col.isin(["1", "7"]), lambda t: t.string_col.isin(["1", "7"]), id="is_in_deferred", ), @@ -413,9 +414,11 @@ def test_agg_name_in_output_column(alltypes): def test_grouped_case(con): - table = ibis.memtable({"key": [1, 1, 2, 2], "value": [10, 30, 20, 40]}) + table = letsql.memtable({"key": [1, 1, 2, 2], "value": [10, 30, 20, 40]}) - case_expr = ibis.case().when(table.value < 25, table.value).else_(ibis.null()).end() + case_expr = ( + letsql.case().when(table.value < 25, table.value).else_(letsql.null()).end() + ) expr = table.group_by("key").aggregate(mx=case_expr.max()).order_by("key") result = con.execute(expr) diff --git a/python/letsql/tests/test_api.py b/python/letsql/tests/test_api.py index cbe0c451..5e165dc1 100644 --- a/python/letsql/tests/test_api.py +++ b/python/letsql/tests/test_api.py @@ -1,16 +1,18 @@ from __future__ import annotations +from typing import Callable + import ibis.expr.types as ir import pytest from pytest import param +import letsql from letsql.tests.conftest import TEST_TABLES def test_list_tables(con): tables = con.list_tables() assert isinstance(tables, list) - # only table that is guaranteed to be in all backends key = "functional_alltypes" assert key in tables or key.upper() in tables assert all(isinstance(table, str) for table in tables) @@ -83,10 +85,23 @@ def test_limit_chain(alltypes, expr_fn): param(lambda t: t.join(t.view(), [("id", "int_col")]), id="self join"), ], ) -def test_unbind(alltypes, expr_fn): +def test_unbind(alltypes, expr_fn: Callable): + letsql.options.interactive = False + expr = expr_fn(alltypes) assert expr.unbind() != expr assert expr.unbind().schema() == expr.schema() assert "Unbound" not in repr(expr) assert "Unbound" in repr(expr.unbind()) + + +@pytest.mark.parametrize( + ("extension", "method"), + [("parquet", letsql.read_parquet), ("csv", letsql.read_csv)], +) +def test_read(data_dir, extension, method): + table = method( + data_dir / extension / f"batting.{extension}", table_name=f"batting-{extension}" + ) + assert table.execute() is not None diff --git a/python/letsql/tests/test_array.py b/python/letsql/tests/test_array.py index 8b6aa33f..54e0c8eb 100644 --- a/python/letsql/tests/test_array.py +++ b/python/letsql/tests/test_array.py @@ -1,12 +1,12 @@ from __future__ import annotations -import ibis import ibis.expr.types as ir import numpy as np import pandas as pd import pytest - from pytest import param + +import letsql from letsql.tests.util import assert_series_equal, assert_frame_equal @@ -24,7 +24,7 @@ def flatten_data(): def test_array_column(alltypes, df): - expr = ibis.array([alltypes["double_col"], alltypes["double_col"]]) + expr = letsql.array([alltypes["double_col"], alltypes["double_col"]]) assert isinstance(expr, ir.ArrayColumn) result = expr.execute() @@ -36,7 +36,7 @@ def test_array_column(alltypes, df): def test_array_scalar(con): - expr = ibis.array([1.0, 2.0, 3.0]) + expr = letsql.array([1.0, 2.0, 3.0]) assert isinstance(expr, ir.ArrayScalar) result = con.execute(expr.name("tmp")) @@ -46,7 +46,7 @@ def test_array_scalar(con): def test_array_repeat(con): - expr = ibis.array([1.0, 2.0]) * 2 + expr = letsql.array([1.0, 2.0]) * 2 result = con.execute(expr.name("tmp")) expected = np.array([1.0, 2.0, 1.0, 2.0]) @@ -55,8 +55,8 @@ def test_array_repeat(con): def test_array_concat(con): - left = ibis.literal([1, 2, 3]) - right = ibis.literal([2, 1]) + left = letsql.literal([1, 2, 3]) + right = letsql.literal([2, 1]) expr = left + right result = con.execute(expr.name("tmp")) expected = np.array([1, 2, 3, 2, 1]) @@ -64,8 +64,8 @@ def test_array_concat(con): def test_array_concat_variadic(con): - left = ibis.literal([1, 2, 3]) - right = ibis.literal([2, 1]) + left = letsql.literal([1, 2, 3]) + right = letsql.literal([2, 1]) expr = left.concat(right, right, right) result = con.execute(expr.name("tmp")) expected = np.array([1, 2, 3, 2, 1, 2, 1, 2, 1]) @@ -74,7 +74,7 @@ def test_array_concat_variadic(con): def test_array_radd_concat(con): left = [1] - right = ibis.literal([2]) + right = letsql.literal([2]) expr = left + right result = con.execute(expr.name("tmp")) expected = np.array([1, 2]) @@ -83,13 +83,13 @@ def test_array_radd_concat(con): def test_array_length(con): - expr = ibis.literal([1, 2, 3]).length() + expr = letsql.literal([1, 2, 3]).length() assert con.execute(expr.name("tmp")) == 3 def test_list_literal(con): arr = [1, 2, 3] - expr = ibis.literal(arr) + expr = letsql.literal(arr) result = con.execute(expr.name("tmp")) assert np.array_equal(result, arr) @@ -97,7 +97,7 @@ def test_list_literal(con): def test_np_array_literal(con): arr = np.array([1, 2, 3]) - expr = ibis.literal(arr) + expr = letsql.literal(arr) result = con.execute(expr.name("tmp")) assert np.array_equal(result, arr) @@ -113,7 +113,7 @@ def test_array_contains(con, array_types): @pytest.mark.skip(reason="failing in datafusion 34+ version") def test_array_position(con): - t = ibis.memtable({"a": [[1], [], [42, 42], []]}) + t = letsql.memtable({"a": [[1], [], [42, 42], []]}) expr = t.a.index(42) result = con.execute(expr) expected = pd.Series([-1, -1, 0, -1], dtype="object") @@ -121,7 +121,7 @@ def test_array_position(con): def test_array_remove(con): - t = ibis.memtable({"a": [[3, 2], [], [42, 2], [2, 2], []]}) + t = letsql.memtable({"a": [[3, 2], [], [42, 2], [2, 2], []]}) expr = t.a.remove(2) result = con.execute(expr) expected = pd.Series([[3], [], [42], [], []], dtype="object") @@ -139,7 +139,7 @@ def test_array_remove(con): ) def test_array_flatten(con, flatten_data, column, expected): data = flatten_data[column] - t = ibis.memtable({column: data["data"]}, schema={column: data["type"]}) + t = letsql.memtable({column: data["data"]}, schema={column: data["type"]}) expr = t[column].flatten() result = con.execute(expr) assert_series_equal( @@ -164,14 +164,14 @@ def test_array_flatten(con, flatten_data, column, expected): ], ) def test_range_start_stop_step(con, start, stop, step): - expr = ibis.range(start, stop, step) + expr = letsql.range(start, stop, step) result = con.execute(expr) assert list(result) == list(range(start, stop, step)) @pytest.mark.parametrize("n", [-2, 0, 2]) def test_range_single_argument(con, n): - expr = ibis.range(n) + expr = letsql.range(n) result = con.execute(expr) assert list(result) == list(range(n)) @@ -187,7 +187,7 @@ def test_range_single_argument(con, n): ], ) def test_array_unique(con, data, expected): - t = ibis.memtable(data) + t = letsql.memtable(data) expr = t.a.unique() result = con.execute(expr) assert_series_equal(result, pd.Series(expected, dtype="object")) @@ -276,7 +276,7 @@ def test_unnest_no_nulls(array_types): def test_unnest_default_name(array_types): df = array_types.execute() expr = ( - array_types.x.cast("!array") + ibis.array([1]).cast("!array") + array_types.x.cast("!array") + letsql.array([1]).cast("!array") ).unnest() assert expr.get_name().startswith("ArrayConcat(") diff --git a/python/letsql/tests/test_client.py b/python/letsql/tests/test_client.py index 31586bf1..71a1c79a 100644 --- a/python/letsql/tests/test_client.py +++ b/python/letsql/tests/test_client.py @@ -10,6 +10,7 @@ import rich.console from pytest import param +import letsql from letsql.tests.util import assert_frame_equal if TYPE_CHECKING: @@ -18,7 +19,7 @@ @pytest.fixture def new_schema(): - return ibis.schema([("a", "string"), ("b", "bool"), ("c", "int32")]) + return letsql.schema([("a", "string"), ("b", "bool"), ("c", "int32")]) def _create_temp_table_with_schema(con, temp_table_name, schema, data=None): @@ -43,19 +44,19 @@ def _create_temp_table_with_schema(con, temp_table_name, schema, data=None): ("expr", "expected"), [ param( - ibis.memtable([(1, 2.0, "3")], columns=list("abc")), + letsql.memtable([(1, 2.0, "3")], columns=list("abc")), pd.DataFrame([(1, 2.0, "3")], columns=list("abc")), id="simple", ), param( - ibis.memtable([(1, 2.0, "3")]), + letsql.memtable([(1, 2.0, "3")]), pd.DataFrame([(1, 2.0, "3")], columns=["col0", "col1", "col2"]), id="simple_auto_named", ), param( - ibis.memtable( + letsql.memtable( [(1, 2.0, "3")], - schema=ibis.schema(dict(a="int8", b="float32", c="string")), + schema=letsql.schema(dict(a="int8", b="float32", c="string")), ), pd.DataFrame([(1, 2.0, "3")], columns=list("abc")).astype( {"a": "int8", "b": "float32"} @@ -63,7 +64,7 @@ def _create_temp_table_with_schema(con, temp_table_name, schema, data=None): id="simple_schema", ), param( - ibis.memtable( + letsql.memtable( pd.DataFrame({"a": [1], "b": [2.0], "c": ["3"]}).astype( {"a": "int8", "b": "float32"} ) @@ -74,7 +75,7 @@ def _create_temp_table_with_schema(con, temp_table_name, schema, data=None): id="dataframe", ), param( - ibis.memtable([dict(a=1), dict(a=2)]), + letsql.memtable([dict(a=1), dict(a=2)]), pd.DataFrame({"a": [1, 2]}), id="list_of_dicts", ), @@ -86,7 +87,7 @@ def test_in_memory_table(con, expr, expected): def test_filter_memory_table(con): - t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) + t = letsql.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) expr = t.filter(t.x > 1) expected = pd.DataFrame({"x": [3, 5], "y": [4, 6]}) result = con.execute(expr) @@ -94,14 +95,14 @@ def test_filter_memory_table(con): def test_agg_memory_table(con): - t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) + t = letsql.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) expr = t.x.count() result = con.execute(expr) assert result == 3 def test_self_join_memory_table(con): - t = ibis.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) + t = letsql.memtable({"x": [1, 2], "y": [2, 1], "z": ["a", "b"]}) t_view = t.view() expr = t.join(t_view, t.x == t_view.y).select("x", "y", "z", "z_right") result = con.execute(expr).sort_values("x").reset_index(drop=True) @@ -131,7 +132,7 @@ def test_dunder_array_column(alltypes, dtype): @pytest.mark.parametrize("interactive", [True, False]) def test_repr(alltypes, interactive, monkeypatch): - monkeypatch.setattr(ibis.options, "interactive", interactive) + monkeypatch.setattr(letsql.options, "interactive", interactive) expr = alltypes.select("date_string_col") @@ -146,8 +147,8 @@ def test_repr(alltypes, interactive, monkeypatch): @pytest.mark.parametrize("show_types", [True, False]) def test_interactive_repr_show_types(alltypes, show_types, monkeypatch): - monkeypatch.setattr(ibis.options, "interactive", True) - monkeypatch.setattr(ibis.options.repr.interactive, "show_types", show_types) + monkeypatch.setattr(letsql.options, "interactive", True) + monkeypatch.setattr(letsql.options.repr.interactive, "show_types", show_types) expr = alltypes.select("id") s = repr(expr) @@ -159,7 +160,7 @@ def test_interactive_repr_show_types(alltypes, show_types, monkeypatch): @pytest.mark.parametrize("is_jupyter", [True, False]) def test_interactive_repr_max_columns(alltypes, is_jupyter, monkeypatch): - monkeypatch.setattr(ibis.options, "interactive", True) + monkeypatch.setattr(letsql.options, "interactive", True) cols = {f"c_{i}": ibis._.id + i for i in range(50)} expr = alltypes.mutate(**cols).select(*cols) @@ -178,13 +179,13 @@ def test_interactive_repr_max_columns(alltypes, is_jupyter, monkeypatch): assert " c_19 " not in text # max_columns = 3 - monkeypatch.setattr(ibis.options.repr.interactive, "max_columns", 3) + monkeypatch.setattr(letsql.options.repr.interactive, "max_columns", 3) text = "".join(s.text for s in console.render(expr, options)) assert " c_2 " in text assert " c_3 " not in text # max_columns = None - monkeypatch.setattr(ibis.options.repr.interactive, "max_columns", None) + monkeypatch.setattr(letsql.options.repr.interactive, "max_columns", None) text = "".join(s.text for s in console.render(expr, options)) assert " c_0 " in text if is_jupyter: @@ -198,7 +199,7 @@ def test_interactive_repr_max_columns(alltypes, is_jupyter, monkeypatch): @pytest.mark.parametrize("expr_type", ["table", "column"]) @pytest.mark.parametrize("interactive", [True, False]) def test_repr_mimebundle(alltypes, interactive, expr_type, monkeypatch): - monkeypatch.setattr(ibis.options, "interactive", interactive) + monkeypatch.setattr(letsql.options, "interactive", interactive) if expr_type == "column": expr = alltypes.date_string_col @@ -206,8 +207,23 @@ def test_repr_mimebundle(alltypes, interactive, expr_type, monkeypatch): expr = alltypes.select("date_string_col") reprs = expr._repr_mimebundle_(include=["text/plain", "text/html"], exclude=[]) - for format in ["text/plain", "text/html"]: + for fmt in ["text/plain", "text/html"]: if interactive: - assert "r0.date_string_col" not in reprs[format] + assert "r0.date_string_col" not in reprs[fmt] else: - assert "r0.date_string_col" in reprs[format] + assert "r0.date_string_col" in reprs[fmt] + + +@pytest.mark.parametrize( + "option", ["max_rows", "max_length", "max_string", "max_depth"] +) +def test_ibis_config_wrapper(option, monkeypatch): + import ibis + + letsql_option_value = getattr(letsql.options.repr.interactive, option) + assert letsql_option_value == getattr(ibis.options.repr.interactive, option) + + monkeypatch.setattr( + letsql.options.repr.interactive, option, letsql_option_value + 1 + ) + assert getattr(ibis.options.repr.interactive, option) == letsql_option_value + 1 diff --git a/python/letsql/tests/test_generic.py b/python/letsql/tests/test_generic.py index ba303828..215f88c8 100644 --- a/python/letsql/tests/test_generic.py +++ b/python/letsql/tests/test_generic.py @@ -3,7 +3,6 @@ import decimal from operator import invert, neg -import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt import numpy as np @@ -14,11 +13,12 @@ from ibis.common.annotations import ValidationError from pytest import param +import letsql from letsql.tests.util import assert_frame_equal, assert_series_equal def test_null_literal(con): - expr = ibis.null() + expr = letsql.null() assert pd.isna(con.execute(expr)) assert con.execute(expr.typeof()) == "Null" @@ -27,7 +27,7 @@ def test_null_literal(con): def test_boolean_literal(con): - expr = ibis.literal(False, type=dt.boolean) + expr = letsql.literal(False, type=dt.boolean) result = con.execute(expr) assert not result assert type(result) in (np.bool_, bool) @@ -37,10 +37,10 @@ def test_boolean_literal(con): @pytest.mark.parametrize( ("expr", "expected"), [ - param(ibis.NA.fillna(5), 5, id="na_fillna"), - param(ibis.literal(5).fillna(10), 5, id="non_na_fillna"), - param(ibis.literal(5).nullif(5), None, id="nullif_null"), - param(ibis.literal(10).nullif(5), 10, id="nullif_not_null"), + param(letsql.null().fillna(5), 5, id="na_fillna"), + param(letsql.literal(5).fillna(10), 5, id="non_na_fillna"), + param(letsql.literal(5).nullif(5), None, id="nullif_null"), + param(letsql.literal(10).nullif(5), 10, id="nullif_not_null"), ], ) def test_scalar_fillna_nullif(con, expr, expected): @@ -67,7 +67,7 @@ def test_scalar_fillna_nullif(con, expr, expected): ) def test_isna(alltypes, col, filt): table = alltypes.select( - nan_col=ibis.literal(np.nan), none_col=ibis.NA.cast("float64") + nan_col=letsql.literal(np.nan), none_col=letsql.null().cast("float64") ) df = table.execute() @@ -84,7 +84,7 @@ def test_isna(alltypes, col, filt): ], ) def test_column_fillna(alltypes, value): - table = alltypes.mutate(missing=ibis.literal(value).cast("float64")) + table = alltypes.mutate(missing=letsql.literal(value).cast("float64")) pd_table = table.execute() res = table.mutate(missing=table.missing.fillna(0.0)).execute() @@ -95,9 +95,13 @@ def test_column_fillna(alltypes, value): @pytest.mark.parametrize( ("expr", "expected"), [ - param(ibis.coalesce(5, None, 4), 5, id="generic"), - param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="null_start_end"), - param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="non_null_last"), + param(letsql.coalesce(5, None, 4), 5, id="generic"), + param(letsql.coalesce(letsql.null(), 4, letsql.null()), 4, id="null_start_end"), + param( + letsql.coalesce(letsql.null(), letsql.null(), 3.14), + 3.14, + id="non_null_last", + ), ], ) def test_coalesce(con, expr, expected): @@ -191,7 +195,7 @@ def test_case_where(alltypes, df): table = alltypes table = table.mutate( new_col=( - ibis.case() + letsql.case() .when(table["int_col"] == 1, 20) .when(table["int_col"] == 0, 10) .else_(0) @@ -305,9 +309,9 @@ def test_dropna_table(alltypes, how, subset): is_four = alltypes.int_col == 4 table = alltypes.mutate( - col_1=is_two.ifelse(ibis.NA, alltypes.float_col), - col_2=is_four.ifelse(ibis.NA, alltypes.float_col), - col_3=(is_two | is_four).ifelse(ibis.NA, alltypes.float_col), + col_1=is_two.ifelse(letsql.null(), alltypes.float_col), + col_2=is_four.ifelse(letsql.null(), alltypes.float_col), + col_3=(is_two | is_four).ifelse(letsql.null(), alltypes.float_col), ).select("col_1", "col_2", "col_3") table_pandas = table.execute() @@ -329,7 +333,7 @@ def test_select_sort_sort(alltypes): param(_.id, {"by": "id"}), param(lambda _: _.id, {"by": "id"}), param( - ibis.desc("id"), + letsql.desc("id"), {"by": "id", "ascending": False}, ), param( @@ -337,7 +341,7 @@ def test_select_sort_sort(alltypes): {"by": ["id", "int_col"]}, ), param( - ["id", ibis.desc("int_col")], + ["id", letsql.desc("int_col")], {"by": ["id", "int_col"], "ascending": [True, False]}, ), ], @@ -349,7 +353,7 @@ def test_order_by(alltypes, df, key, df_kwargs): def test_order_by_random(alltypes): - expr = alltypes.filter(_.id < 100).order_by(ibis.random()).limit(5) + expr = alltypes.filter(_.id < 100).order_by(letsql.random()).limit(5) r1 = expr.execute() r2 = expr.execute() assert len(r1) == 5 @@ -402,7 +406,7 @@ def test_isin_notin(alltypes, df, ibis_op, pandas_op): ], ) def test_logical_negation_literal(con, expr, expected, op): - assert con.execute(op(ibis.literal(expr)).name("tmp")) == expected + assert con.execute(op(letsql.literal(expr)).name("tmp")) == expected @pytest.mark.parametrize( @@ -425,7 +429,7 @@ def test_ifelse_select(alltypes, df): [ "int_col", ( - ibis.ifelse(table["int_col"] == 0, 42, -1) + letsql.ifelse(table["int_col"] == 0, 42, -1) .cast("int64") .name("where_col") ), @@ -443,7 +447,9 @@ def test_ifelse_select(alltypes, df): def test_ifelse_column(alltypes, df): - expr = ibis.ifelse(alltypes["int_col"] == 0, 42, -1).cast("int64").name("where_col") + expr = ( + letsql.ifelse(alltypes["int_col"] == 0, 42, -1).cast("int64").name("where_col") + ) result = expr.execute() expected = pd.Series( @@ -477,12 +483,12 @@ def test_select_filter_select(alltypes, df): def test_interactive(alltypes, monkeypatch): - monkeypatch.setattr(ibis.options, "interactive", True) + monkeypatch.setattr(letsql.options, "interactive", True) expr = alltypes.mutate( str_col=_.string_col.replace("1", "").nullif("2"), date_col=_.timestamp_col.date(), - delta_col=lambda t: ibis.now() - t.timestamp_col, + delta_col=lambda t: letsql.now() - t.timestamp_col, ) repr(expr) @@ -503,7 +509,7 @@ def test_uncorrelated_subquery(batting, batting_df): def test_int_column(alltypes): - expr = alltypes.mutate(x=ibis.literal(1)).x + expr = alltypes.mutate(x=letsql.literal(1)).x result = expr.execute() assert expr.type() == dt.int8 assert result.dtype == np.int8 @@ -536,19 +542,19 @@ def test_int_scalar(alltypes): ], ) def test_literal_na(con, dtype): - expr = ibis.literal(None, type=dtype) + expr = letsql.literal(None, type=dtype) result = con.execute(expr) assert pd.isna(result) def test_memtable_bool_column(con): - t = ibis.memtable({"a": [True, False, True]}) + t = letsql.memtable({"a": [True, False, True]}) assert_series_equal(con.execute(t.a), pd.Series([True, False, True], name="a")) def test_memtable_construct(con, monkeypatch): pa = pytest.importorskip("pyarrow") - monkeypatch.setattr(ibis.options, "default_backend", con) + monkeypatch.setattr(letsql.options, "backend", con) pa_t = pa.Table.from_pydict( { @@ -558,7 +564,7 @@ def test_memtable_construct(con, monkeypatch): "d": [None, "b", None], } ) - t = ibis.memtable(pa_t) + t = letsql.memtable(pa_t) assert_frame_equal(t.execute().fillna(pd.NA), pa_t.to_pandas().fillna(pd.NA)) @@ -644,7 +650,7 @@ def test_sample(functional_alltypes): def test_sample_memtable(con): df = pd.DataFrame({"x": [1, 2, 3, 4]}) - res = con.execute(ibis.memtable(df).sample(0.5)) + res = con.execute(letsql.memtable(df).sample(0.5)) assert len(res) <= 4 assert_frame_equal(res.iloc[:0], df.iloc[:0]) @@ -665,7 +671,7 @@ def hash_256(col): def test_typeof(con): # Other tests also use the typeof operation, but only this test has this operation required. - expr = ibis.literal(1).typeof() + expr = letsql.literal(1).typeof() result = con.execute(expr) assert result is not None diff --git a/python/letsql/tests/test_join.py b/python/letsql/tests/test_join.py index 8d34cdca..8e6f7be3 100644 --- a/python/letsql/tests/test_join.py +++ b/python/letsql/tests/test_join.py @@ -1,12 +1,12 @@ from __future__ import annotations -import ibis import ibis.expr.schema as sch import numpy as np import pandas as pd import pytest from pytest import param +import letsql from letsql.tests.util import assert_frame_equal @@ -188,30 +188,32 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players): [ # Trues param(True, True, id="true"), - param(ibis.literal(True), True, id="true-literal"), + param(letsql.literal(True), True, id="true-literal"), param([True], True, id="true-list"), - param([ibis.literal(True)], True, id="true-literal-list"), + param([letsql.literal(True)], True, id="true-literal-list"), # only trues param([True, True], True, id="true-true-list"), param( - [ibis.literal(True), ibis.literal(True)], True, id="true-true-literal-list" + [letsql.literal(True), letsql.literal(True)], + True, + id="true-true-literal-list", ), - param([True, ibis.literal(True)], True, id="true-true-const-expr-list"), - param([ibis.literal(True), True], True, id="true-true-expr-const-list"), + param([True, letsql.literal(True)], True, id="true-true-const-expr-list"), + param([letsql.literal(True), True], True, id="true-true-expr-const-list"), # Falses param(False, False, id="false"), - param(ibis.literal(False), False, id="false-literal"), + param(letsql.literal(False), False, id="false-literal"), param([False], False, id="false-list"), - param([ibis.literal(False)], False, id="false-literal-list"), + param([letsql.literal(False)], False, id="false-literal-list"), # only falses param([False, False], False, id="false-false-list"), param( - [ibis.literal(False), ibis.literal(False)], + [letsql.literal(False), letsql.literal(False)], False, id="false-false-literal-list", ), - param([False, ibis.literal(False)], False, id="false-false-const-expr-list"), - param([ibis.literal(False), False], False, id="false-false-expr-const-list"), + param([False, letsql.literal(False)], False, id="false-false-const-expr-list"), + param([letsql.literal(False), False], False, id="false-false-expr-const-list"), ], ) @pytest.mark.parametrize( diff --git a/python/letsql/tests/test_numeric.py b/python/letsql/tests/test_numeric.py index 5de1cc42..bf48292a 100644 --- a/python/letsql/tests/test_numeric.py +++ b/python/letsql/tests/test_numeric.py @@ -15,6 +15,7 @@ from ibis.expr import datatypes as dt from pytest import param +import letsql from letsql.tests.util import assert_series_equal, default_series_rename @@ -22,43 +23,43 @@ ("expr",), [ param( - ibis.literal(1, type=dt.int8), + letsql.literal(1, type=dt.int8), id="int8", ), param( - ibis.literal(1, type=dt.int16), + letsql.literal(1, type=dt.int16), id="int16", ), param( - ibis.literal(1, type=dt.int32), + letsql.literal(1, type=dt.int32), id="int32", ), param( - ibis.literal(1, type=dt.int64), + letsql.literal(1, type=dt.int64), id="int64", ), param( - ibis.literal(1, type=dt.uint8), + letsql.literal(1, type=dt.uint8), id="uint8", ), param( - ibis.literal(1, type=dt.uint16), + letsql.literal(1, type=dt.uint16), id="uint16", ), param( - ibis.literal(1, type=dt.uint32), + letsql.literal(1, type=dt.uint32), id="uint32", ), param( - ibis.literal(1, type=dt.uint64), + letsql.literal(1, type=dt.uint64), id="uint64", ), param( - ibis.literal(1, type=dt.float32), + letsql.literal(1, type=dt.float32), id="float32", ), param( - ibis.literal(1, type=dt.float64), + letsql.literal(1, type=dt.float64), id="float64", ), ], @@ -72,12 +73,12 @@ def test_numeric_literal(con, expr): ("expr", "expected_result"), [ param( - ibis.literal(decimal.Decimal("1.1"), type=dt.decimal), + letsql.literal(decimal.Decimal("1.1"), type=dt.decimal), decimal.Decimal("1.1"), id="default", ), param( - ibis.literal(decimal.Decimal("1.1"), type=dt.Decimal(38, 9)), + letsql.literal(decimal.Decimal("1.1"), type=dt.Decimal(38, 9)), decimal.Decimal("1.1"), id="decimal-small", ), @@ -107,22 +108,22 @@ def test_decimal_literal(con, expr, expected_result): id="double-column", ), param( - lambda t: ibis.literal(1.3), + lambda t: letsql.literal(1.3), lambda t: 1.3, id="float-literal", ), param( - lambda t: ibis.literal(np.nan), + lambda t: letsql.literal(np.nan), lambda t: np.nan, id="nan-literal", ), param( - lambda t: ibis.literal(np.inf), + lambda t: letsql.literal(np.inf), lambda t: np.inf, id="inf-literal", ), param( - lambda t: ibis.literal(-np.inf), + lambda t: letsql.literal(-np.inf), lambda t: -np.inf, id="-inf-literal", ), @@ -235,12 +236,12 @@ def test_isnan_isinf( id="mod", ), param( - ibis.greatest(L(10), L(1)), + letsql.greatest(L(10), L(1)), 10, id="greatest", ), param( - ibis.least(L(10), L(1)), + letsql.least(L(10), L(1)), 1, id="least", ), @@ -376,22 +377,22 @@ def test_complex_math_functions_columns(con, alltypes, df, expr_fn, expected_fn) ("expr_fn", "expected_fn"), [ param( - lambda t: ibis.least(t.bigint_col, t.int_col), + lambda t: letsql.least(t.bigint_col, t.int_col), lambda t: pd.Series(list(map(min, t.bigint_col, t.int_col))), id="least-all-columns", ), param( - lambda t: ibis.least(t.bigint_col, t.int_col, -2), + lambda t: letsql.least(t.bigint_col, t.int_col, -2), lambda t: pd.Series(list(map(min, t.bigint_col, t.int_col, [-2] * len(t)))), id="least-scalar", ), param( - lambda t: ibis.greatest(t.bigint_col, t.int_col), + lambda t: letsql.greatest(t.bigint_col, t.int_col), lambda t: pd.Series(list(map(max, t.bigint_col, t.int_col))), id="greatest-all-columns", ), param( - lambda t: ibis.greatest(t.bigint_col, t.int_col, -2), + lambda t: letsql.greatest(t.bigint_col, t.int_col, -2), lambda t: pd.Series(list(map(max, t.bigint_col, t.int_col, [-2] * len(t)))), id="greatest-scalar", ), @@ -515,7 +516,7 @@ def test_divide_by_zero(alltypes, df, column, denominator): def test_random(con): - expr = ibis.random() + expr = letsql.random() result = con.execute(expr) assert isinstance(result, float) assert 0 <= result <= 1 diff --git a/python/letsql/tests/test_param.py b/python/letsql/tests/test_param.py new file mode 100644 index 00000000..fcb82f1a --- /dev/null +++ b/python/letsql/tests/test_param.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import letsql +import ibis.expr.datatypes as dt +import pytest + +from letsql.tests.util import default_series_rename, assert_series_equal + + +@pytest.mark.parametrize( + ("column", "raw_value"), + [ + ("double_col", 0.0), + ("double_col", 10.1), + ("float_col", 1.1), + ("float_col", 2.2), + ], +) +def test_floating_scalar_parameter(alltypes, df, column, raw_value): + value = letsql.param(dt.double) + expr = (alltypes[column] + value).name("tmp") + expected = df[column] + raw_value + result = expr.execute(params={value: raw_value}) + expected = default_series_rename(expected) + assert_series_equal(result, expected, check_dtype=False) diff --git a/python/letsql/tests/test_set_ops.py b/python/letsql/tests/test_set_ops.py index 81d28e31..d7b0882f 100644 --- a/python/letsql/tests/test_set_ops.py +++ b/python/letsql/tests/test_set_ops.py @@ -2,13 +2,13 @@ import random -import ibis import ibis.expr.types as ir import pandas as pd import pytest from ibis import _ from pytest import param +import letsql from letsql.tests.util import assert_frame_equal @@ -37,7 +37,7 @@ def union_subsets(alltypes, df): def test_union(union_subsets, distinct): (a, b, c), (da, db, dc) = union_subsets - expr = ibis.union(a, b, distinct=distinct).order_by("id") + expr = letsql.union(a, b, distinct=distinct).order_by("id") result = expr.execute() expected = pd.concat([da, db], axis=0).sort_values("id").reset_index(drop=True) @@ -77,7 +77,7 @@ def test_intersect(alltypes, df, distinct): db = df[(df.id >= 5205) & (df.id <= 5215)] dc = df[(df.id >= 5195) & (df.id <= 5208)] - expr = ibis.intersect(a, b, c, distinct=distinct).order_by("id") + expr = letsql.intersect(a, b, c, distinct=distinct).order_by("id") result = expr.execute() index = da.index.intersection(db.index).intersection(dc.index) @@ -106,7 +106,7 @@ def test_difference(alltypes, df, distinct): db = df[(df.id >= 5205) & (df.id <= 5215)] dc = df[(df.id >= 5195) & (df.id <= 5202)] - expr = ibis.difference(a, b, c, distinct=distinct).order_by("id") + expr = letsql.difference(a, b, c, distinct=distinct).order_by("id") result = expr.execute() index = da.index.difference(db.index).difference(dc.index) @@ -120,7 +120,7 @@ def test_difference(alltypes, df, distinct): @pytest.mark.parametrize("method", ["intersect", "difference", "union"]) def test_table_set_operations_api(alltypes, method): # top level variadic - result = getattr(ibis, method)(alltypes) + result = getattr(letsql, method)(alltypes) assert result.equals(alltypes) # table level methods require at least one argument diff --git a/python/letsql/tests/test_string.py b/python/letsql/tests/test_string.py index 66719466..466b2ab0 100644 --- a/python/letsql/tests/test_string.py +++ b/python/letsql/tests/test_string.py @@ -1,11 +1,11 @@ from __future__ import annotations -import ibis import ibis.expr.datatypes as dt import pandas as pd import pytest from pytest import param +import letsql from letsql.tests.util import assert_frame_equal, assert_series_equal @@ -27,7 +27,7 @@ ], ) def test_string_literal(con, text_value): - expr = ibis.literal(text_value) + expr = letsql.literal(text_value) result = con.execute(expr) assert result == text_value @@ -268,7 +268,7 @@ def test_string_col_is_unicode(alltypes, df): id="expr_slice_begin_end", ), param( - lambda t: ibis.literal("-").join(["a", t.string_col, "c"]), + lambda t: letsql.literal("-").join(["a", t.string_col, "c"]), lambda t: "a-" + t.string_col + "-c", id="join", ), @@ -303,14 +303,14 @@ def test_string(alltypes, df, result_func, expected_func): def test_re_replace_global(con): - expr = ibis.literal("aba").re_replace("a", "c") + expr = letsql.literal("aba").re_replace("a", "c") result = con.execute(expr) assert result == "cbc" def test_substr_with_null_values(alltypes, df): table = alltypes.mutate( - substr_col_null=ibis.case() + substr_col_null=letsql.case() .when(alltypes["bool_col"], alltypes["string_col"]) .else_(None) .end() @@ -350,7 +350,7 @@ def test_substr_with_null_values(alltypes, df): param(lambda d: d.query(), "name=networking", id="query"), param(lambda d: d.query("name"), "networking", id="query-key"), param( - lambda d: d.query(ibis.literal("na") + ibis.literal("me")), + lambda d: d.query(letsql.literal("na") + letsql.literal("me")), "networking", id="query-dynamic-key", ), @@ -359,27 +359,27 @@ def test_substr_with_null_values(alltypes, df): ) def test_parse_url(con, result_func, expected): url = "http://user:pass@example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING" - expr = result_func(ibis.literal(url)) + expr = result_func(letsql.literal(url)) result = con.execute(expr) assert result == expected def test_capitalize(con): - s = ibis.literal("aBc") + s = letsql.literal("aBc") expected = "Abc" expr = s.capitalize() assert con.execute(expr) == expected def test_subs_with_re_replace(con): - expr = ibis.literal("hi").re_replace("i", "a").substitute({"d": "b"}, else_="k") + expr = letsql.literal("hi").re_replace("i", "a").substitute({"d": "b"}, else_="k") result = con.execute(expr) assert result == "k" def test_multiple_subs(con): m = {"foo": "FOO", "bar": "BAR"} - expr = ibis.literal("foo").substitute(m) + expr = letsql.literal("foo").substitute(m) result = con.execute(expr) assert result == "FOO" @@ -387,8 +387,8 @@ def test_multiple_subs(con): @pytest.mark.parametrize( "expr", [ - param(ibis.case().when(True, "%").end(), id="case"), - param(ibis.ifelse(True, "%", ibis.NA), id="ifelse"), + param(letsql.case().when(True, "%").end(), id="case"), + param(letsql.ifelse(True, "%", letsql.null()), id="ifelse"), ], ) def test_no_conditional_percent_escape(con, expr): @@ -396,5 +396,5 @@ def test_no_conditional_percent_escape(con, expr): def test_string_length(con): - t = ibis.memtable({"s": ["aaa", "a", "aa"]}) + t = letsql.memtable({"s": ["aaa", "a", "aa"]}) assert con.execute(t.s.length()).gt(0).all() diff --git a/python/letsql/tests/test_struct.py b/python/letsql/tests/test_struct.py index 5928d752..2e546b8e 100644 --- a/python/letsql/tests/test_struct.py +++ b/python/letsql/tests/test_struct.py @@ -2,13 +2,13 @@ from collections.abc import Mapping -import ibis import ibis.expr.datatypes as dt import numpy as np import pandas as pd import pytest from pytest import param +import letsql from letsql.tests.util import assert_series_equal, assert_frame_equal @@ -48,11 +48,11 @@ def test_all_fields(struct, struct_df): _SIMPLE_DICT = dict(a=1, b="2", c=3.0) -_STRUCT_LITERAL = ibis.struct( +_STRUCT_LITERAL = letsql.struct( _SIMPLE_DICT, type="struct", ) -_NULL_STRUCT_LITERAL = ibis.null().cast("struct") +_NULL_STRUCT_LITERAL = letsql.null().cast("struct") @pytest.mark.parametrize("field", ["a", "b", "c"]) @@ -67,7 +67,7 @@ def test_literal(con, field): def test_struct_column(alltypes, df): t = alltypes - expr = t.select(s=ibis.struct(dict(a=t.string_col, b=1, c=t.bigint_col))) + expr = t.select(s=letsql.struct(dict(a=t.string_col, b=1, c=t.bigint_col))) assert expr.s.type() == dt.Struct(dict(a=dt.string, b=dt.int8, c=dt.int64)) result = expr.execute() expected = pd.DataFrame( @@ -77,8 +77,8 @@ def test_struct_column(alltypes, df): def test_field_access_after_case(con): - s = ibis.struct({"a": 3}) - x = ibis.case().when(True, s).else_(ibis.struct({"a": 4})).end() + s = letsql.struct({"a": 3}) + x = letsql.case().when(True, s).else_(letsql.struct({"a": 4})).end() y = x.a assert con.to_pandas(y) == 3 @@ -91,7 +91,7 @@ def test_collect_into_struct(alltypes): t[_.string_col.isin(("0", "1"))] .group_by(group="string_col") .agg( - val=lambda t: ibis.struct( + val=lambda t: letsql.struct( dict(key=t.bigint_col.collect().cast("array")) ) ) diff --git a/python/letsql/tests/test_temporal.py b/python/letsql/tests/test_temporal.py index 835e90d0..1430e366 100644 --- a/python/letsql/tests/test_temporal.py +++ b/python/letsql/tests/test_temporal.py @@ -5,13 +5,13 @@ import warnings from operator import methodcaller -import ibis import ibis.expr.datatypes as dt import numpy as np import pandas as pd import pytest from pytest import param +import letsql as ls from letsql.tests.util import ( assert_frame_equal, assert_series_equal, @@ -113,7 +113,7 @@ def test_timestamp_extract(alltypes, df, attr): ], ) def test_timestamp_extract_literal(con, func, expected): - value = ibis.timestamp("2015-09-01 14:48:05.359") + value = ls.timestamp("2015-09-01 14:48:05.359") assert con.execute(func(value).name("tmp")) == expected @@ -263,7 +263,7 @@ def test_timestamp_comparison_filter_numpy(con, alltypes, df, func_name): def test_interval_add_cast_scalar(alltypes): timestamp_date = alltypes.timestamp_col.date() - delta = ibis.literal(10).cast("interval('D')") + delta = ls.literal(10).cast("interval('D')") expr = (timestamp_date + delta).name("result") result = expr.execute() expected = timestamp_date.name("result").execute() + pd.Timedelta(10, unit="D") @@ -302,7 +302,7 @@ def test_interval_add_cast_column(alltypes, df): ], ) def test_day_of_week_scalar(con, date, expected_index, expected_day): - expr = ibis.literal(date).cast(dt.date) + expr = ls.literal(date).cast(dt.date) result_index = con.execute(expr.day_of_week.index().name("tmp")) assert result_index == expected_index @@ -360,8 +360,8 @@ def test_day_of_week_column_group_by( def test_date_scalar_from_iso(con): - expr = ibis.literal("2022-02-24") - expr2 = ibis.date(expr) + expr = ls.literal("2022-02-24") + expr2 = ls.date(expr) result = con.execute(expr2) assert result.strftime("%Y-%m-%d") == "2022-02-24" @@ -374,7 +374,7 @@ def test_date_column_from_iso(con, alltypes, df): + alltypes.month.cast("string").lpad(2, "0") + "-13" ) - expr = ibis.date(expr) + expr = ls.date(expr) result = con.execute(expr.name("tmp")) golden = df.year.astype(str) + "-" + df.month.astype(str).str.rjust(2, "0") + "-13" @@ -383,14 +383,14 @@ def test_date_column_from_iso(con, alltypes, df): def test_timestamp_extract_milliseconds_with_big_value(con): - timestamp = ibis.timestamp("2021-01-01 01:30:59.333456") + timestamp = ls.timestamp("2021-01-01 01:30:59.333456") millis = timestamp.millisecond() result = con.execute(millis.name("tmp")) assert result == 333 def test_big_timestamp(con): - value = ibis.timestamp("2419-10-11 10:10:25") + value = ls.timestamp("2419-10-11 10:10:25") result = con.execute(value.name("tmp")) expected = datetime.datetime(2419, 10, 11, 10, 10, 25) assert result == expected @@ -441,7 +441,7 @@ def test_timestamp_date_comparison(alltypes, df, left_fn, right_fn): def test_large_timestamp(con): huge_timestamp = datetime.datetime(year=4567, month=1, day=1) - expr = ibis.timestamp("4567-01-01 00:00:00") + expr = ls.timestamp("4567-01-01 00:00:00") result = con.execute(expr) assert result.replace(tzinfo=None) == huge_timestamp @@ -471,7 +471,7 @@ def test_large_timestamp(con): ) def test_timestamp_precision_output(con, ts, scale, unit): dtype = dt.Timestamp(scale=scale) - expr = ibis.literal(ts).cast(dtype) + expr = ls.literal(ts).cast(dtype) result = con.execute(expr) expected = pd.Timestamp(ts).floor(unit) assert result == expected @@ -503,19 +503,19 @@ def test_timestamp_precision_output(con, ts, scale, unit): ], ) def test_time_extract_literal(con, func, expected): - value = ibis.time("14:48:05.359") + value = ls.time("14:48:05.359") assert con.execute(func(value).name("tmp")) == expected def test_now(con): - expr = ibis.now() + expr = ls.now() result = con.execute(expr.name("tmp")) assert isinstance(result, datetime.datetime) def test_now_from_projection(alltypes): n = 2 - expr = alltypes.select(now=ibis.now()).limit(n) + expr = alltypes.select(now=ls.now()).limit(n) result = expr.execute() ts = result.now assert len(result) == n @@ -536,7 +536,7 @@ def test_integer_to_interval_date(con, alltypes, df, unit): interval = alltypes.int_col.to_interval(unit=unit) array = alltypes.date_string_col.split("/") month, day, year = array[0], array[1], array[2] - date_col = ibis.literal("-").join(["20" + year, month, day]).cast("date") + date_col = ls.literal("-").join(["20" + year, month, day]).cast("date") expr = (date_col + interval).name("tmp") with warnings.catch_warnings(): @@ -662,7 +662,7 @@ def test_timestamp_bucket(alltypes, kws: dict, pd_freq): @pytest.mark.parametrize("offset_in_minutes", [2, -2], ids=["pos", "neg"]) def test_timestamp_bucket_offset(alltypes, offset_in_minutes): ts = alltypes.timestamp_col - expr = ts.bucket(minutes=5, offset=ibis.interval(minutes=offset_in_minutes)) + expr = ts.bucket(minutes=5, offset=ls.interval(minutes=offset_in_minutes)) res = expr.execute().astype("datetime64[ns]").rename("ts") td = pd.Timedelta(minutes=offset_in_minutes) sol = ((ts.execute().rename("ts") - td).dt.floor("300s") + td).astype( @@ -674,7 +674,7 @@ def test_timestamp_bucket_offset(alltypes, offset_in_minutes): @pytest.mark.parametrize("offset_in_hours", [2, -2], ids=["pos", "neg"]) def test_timestamp_bucket_offset_in_hours(alltypes, offset_in_hours): ts = alltypes.timestamp_col - expr = ts.bucket(minutes=5, offset=ibis.interval(hours=offset_in_hours)) + expr = ts.bucket(minutes=5, offset=ls.interval(hours=offset_in_hours)) res = expr.execute().astype("datetime64[ns]").rename("ts") td = pd.Timedelta(hours=offset_in_hours) sol = ((ts.execute().rename("ts") - td).dt.floor("300s") + td).astype( diff --git a/python/letsql/tests/test_wrapper.py b/python/letsql/tests/test_wrapper.py index c527310a..bab91c9a 100644 --- a/python/letsql/tests/test_wrapper.py +++ b/python/letsql/tests/test_wrapper.py @@ -1,4 +1,3 @@ -import ibis import pandas as pd import pytest from pytest import param @@ -36,7 +35,7 @@ def test_simple_agg_ops_read_csv(data_dir): def test_memtable_ops_dict(): - t = ibis.memtable({"s": ["aaa", "a", "aa"]}) + t = ls.memtable({"s": ["aaa", "a", "aa"]}) assert t.s.length().execute().gt(0).all() @@ -44,17 +43,17 @@ def test_memtable_ops_dict(): ("expr", "expected"), [ param( - lambda: ibis.memtable([(1, 2.0, "3")], columns=list("abc")), + lambda: ls.memtable([(1, 2.0, "3")], columns=list("abc")), pd.DataFrame([(1, 2.0, "3")], columns=list("abc")), id="simple", ), param( - lambda: ibis.memtable([(1, 2.0, "3")]), + lambda: ls.memtable([(1, 2.0, "3")]), pd.DataFrame([(1, 2.0, "3")], columns=["col0", "col1", "col2"]), id="simple_auto_named", ), param( - lambda: ibis.memtable( + lambda: ls.memtable( pd.DataFrame({"a": [1], "b": [2.0], "c": ["3"]}).astype( {"a": "int8", "b": "float32"} ) @@ -65,7 +64,7 @@ def test_memtable_ops_dict(): id="dataframe", ), param( - lambda: ibis.memtable([dict(a=1), dict(a=2)]), + lambda: ls.memtable([dict(a=1), dict(a=2)]), pd.DataFrame({"a": [1, 2]}), id="list_of_dicts", ), diff --git a/src/context.rs b/src/context.rs index abec623e..dbcf9ad3 100644 --- a/src/context.rs +++ b/src/context.rs @@ -310,6 +310,16 @@ impl PySessionContext { Ok(()) } + pub fn register_dataframe(&mut self, name: &str, dataframe: PyDataFrame) -> PyResult<()> { + let table: Arc = dataframe.df.as_ref().clone().into_view(); + + self.ctx + .register_table(name, table) + .map_err(DataFusionError::from)?; + + Ok(()) + } + pub fn register_table(&mut self, name: &str, table: &PyTable) -> PyResult<()> { self.ctx .register_table(name, table.table()) diff --git a/src/dataframe.rs b/src/dataframe.rs index 63ff2b72..5033b205 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -23,7 +23,7 @@ use crate::utils::{get_tokio_runtime, wait_for_completion, wait_for_future}; #[pyclass(name = "DataFrame", module = "datafusion", subclass)] #[derive(Clone)] pub struct PyDataFrame { - df: Arc, + pub df: Arc, } impl PyDataFrame {