From bac745e9d320f9e98390d918144f37f2e4144da7 Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sat, 26 Nov 2022 20:49:12 +0100 Subject: [PATCH 01/11] fix ibis bokeh datetime axis --- holoviews/core/util.py | 1 - holoviews/plotting/bokeh/element.py | 2 +- .../tests/core/data/test_ibisinterface.py | 19 +++++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 8a1d5eae70..9074ad4465 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -890,7 +890,6 @@ def isfinite(val): return finite & (~pd.isna(val)) return finite - def isdatetime(value): """ Whether the array or scalar is recognized datetime type. diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index 96cc668dee..f65a45453e 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -420,7 +420,7 @@ def _axes_props(self, plots, subplots, element, ranges): categorical_x = True else: xtype = el.get_dimension_type(xdims[0]) - if ((xtype is np.object_ and issubclass(type(l), util.datetime_types)) or + if (((xtype is np.object_ or isinstance(xtype, np.object)) and issubclass(type(l), util.datetime_types)) or xtype in util.datetime_types): x_axis_type = 'datetime' diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index b5b192d464..7631eb9c53 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -12,9 +12,12 @@ import numpy as np import pandas as pd +from bokeh.models import axes as bokeh_axes +from holoviews import render from holoviews.core.data import Dataset from holoviews.core.spaces import HoloMap from holoviews.core.data.ibis import IbisInterface +from holoviews.element.chart import Curve from .base import HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests @@ -303,3 +306,19 @@ def test_dataset_iloc_ellipsis_list_cols(self): def test_dataset_boolean_index(self): raise SkipTest("Not supported") + + def test_datetime_xaxis(self): + """Test to make sure a DateTimeAxis can be identified for the bokeh backend""" + # Given + df = pd.DataFrame({ + "x": [pd.Timestamp("2022-01-01"), pd.Timestamp("2022-01-02")], "y": [1,2] + }) + con = ibis.pandas.connect({"df": df}) + table = con.table("df") + plot_ibis = Curve(table, kdims="x", vdims="y") + # When + plot_bokeh = render(plot_ibis, "bokeh") + # Then + xaxis, yaxis = plot_bokeh.axis + assert isinstance(xaxis, bokeh_axes.DatetimeAxis) + assert isinstance(yaxis, bokeh_axes.LinearAxis) From a053b00088739f94d6acfc5cbfed43c25b4c922e Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sat, 26 Nov 2022 20:54:36 +0100 Subject: [PATCH 02/11] refactor --- holoviews/tests/core/data/test_ibisinterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index 7631eb9c53..8ebfffe338 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -318,7 +318,7 @@ def test_datetime_xaxis(self): plot_ibis = Curve(table, kdims="x", vdims="y") # When plot_bokeh = render(plot_ibis, "bokeh") - # Then xaxis, yaxis = plot_bokeh.axis + # Then assert isinstance(xaxis, bokeh_axes.DatetimeAxis) assert isinstance(yaxis, bokeh_axes.LinearAxis) From 27b40b4be7e852b5b5d3ca55b2454c86d0a54e94 Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sun, 27 Nov 2022 08:30:09 +0100 Subject: [PATCH 03/11] fix --- holoviews/tests/core/data/test_ibisinterface.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index 8ebfffe338..548f276e07 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -322,3 +322,19 @@ def test_datetime_xaxis(self): # Then assert isinstance(xaxis, bokeh_axes.DatetimeAxis) assert isinstance(yaxis, bokeh_axes.LinearAxis) + + def test_categorical_xaxis(self): + """Test to make sure a Categorical axis can be identified for the bokeh backend""" + # Given + df = pd.DataFrame({ + "x": ["A", "B"], "y": [1,2] + }) + con = ibis.pandas.connect({"df": df}) + table = con.table("df") + plot_ibis = Curve(table, kdims="x", vdims="y") + # When + plot_bokeh = render(plot_ibis, "bokeh") + xaxis, yaxis = plot_bokeh.axis + # Then + assert isinstance(xaxis, bokeh_axes.CategoricalAxis) + assert isinstance(yaxis, bokeh_axes.LinearAxis) From 7976efc8705bdb747fa5c6a0bd6b41a1c5c4394a Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sun, 27 Nov 2022 10:34:42 +0100 Subject: [PATCH 04/11] fix --- holoviews/core/data/ibis.py | 4 +++- holoviews/plotting/bokeh/element.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 6213b81e6d..02bfa30055 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -172,7 +172,9 @@ def dtype(cls, dataset, dimension): dimension = dataset.get_dimension(dimension) return dataset.data.head(0).execute().dtypes[dimension.name] - dimension_type = dtype + @classmethod + def dimension_type(cls, dataset, dim): + return cls.dtype(dataset, dim) @classmethod def sort(cls, dataset, by=[], reverse=False): diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index f65a45453e..96cc668dee 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -420,7 +420,7 @@ def _axes_props(self, plots, subplots, element, ranges): categorical_x = True else: xtype = el.get_dimension_type(xdims[0]) - if (((xtype is np.object_ or isinstance(xtype, np.object)) and issubclass(type(l), util.datetime_types)) or + if ((xtype is np.object_ and issubclass(type(l), util.datetime_types)) or xtype in util.datetime_types): x_axis_type = 'datetime' From ad69245a27be3627dfaa38a9a3504d07bd51dda5 Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sun, 27 Nov 2022 11:23:41 +0100 Subject: [PATCH 05/11] found the fix --- holoviews/core/data/ibis.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 02bfa30055..95b04d6672 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -111,7 +111,12 @@ def nonzero(cls, dataset): @cached def range(cls, dataset, dimension): dimension = dataset.get_dimension(dimension, strict=True) - if cls.dtype(dataset, dimension).kind in 'SUO': + if cls.dtype(dataset, dimension).kind == 'O': + column = dataset.data[dimension.name] + first = column.first().execute() + last = column.last().execute() + return first, last + if cls.dtype(dataset, dimension).kind in 'SU': return None, None if dimension.nodata is not None: return Interface.range(dataset, dimension) @@ -174,7 +179,7 @@ def dtype(cls, dataset, dimension): @classmethod def dimension_type(cls, dataset, dim): - return cls.dtype(dataset, dim) + return cls.dtype(dataset, dim).type @classmethod def sort(cls, dataset, by=[], reverse=False): From 5e5c4699dc6b62955558a8c7eeb69fc62c74f4ac Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sun, 27 Nov 2022 12:12:36 +0100 Subject: [PATCH 06/11] mature ibis fix and tests --- holoviews/core/data/ibis.py | 7 +-- .../tests/core/data/test_ibisinterface.py | 50 +++++++++++++------ 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 95b04d6672..485dae64db 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -112,9 +112,10 @@ def nonzero(cls, dataset): def range(cls, dataset, dimension): dimension = dataset.get_dimension(dimension, strict=True) if cls.dtype(dataset, dimension).kind == 'O': - column = dataset.data[dimension.name] - first = column.first().execute() - last = column.last().execute() + # Can this be done more efficiently? + column = dataset.data[dimension.name].execute() + first = column.iloc[0] + last = column.iloc[-1] return first, last if cls.dtype(dataset, dimension).kind in 'SU': return None, None diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index 548f276e07..5bf1fec830 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -103,6 +103,20 @@ def init_column_data(self): hm_db.table("hm"), kdims=[("x", "X")], vdims=[("y", "Y")] ) + reference_df = pd.DataFrame( + { + "actual": [100, 150, 125, 140, 145, 135, 123], + "forecast": [90, 160, 125, 150, 141, 141, 120], + "numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5], + "date": pd.date_range("2022-01-03", "2022-01-09"), + "string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], + }, + ) + reference_db = create_temp_db(reference_df, "reference_df") + self.reference_table = Dataset( + reference_db.table("reference_df"), kdims=["numerical", "date", "string"], vdims=["actual", "forecast"] + ) + def test_dataset_array_init_hm(self): raise SkipTest("Not supported") @@ -307,15 +321,19 @@ def test_dataset_iloc_ellipsis_list_cols(self): def test_dataset_boolean_index(self): raise SkipTest("Not supported") + def test_range(self): + assert IbisInterface.range(self.reference_table, "date") == (np.datetime64('2022-01-03'), np.datetime64('2022-01-09')) + assert IbisInterface.range(self.reference_table, "string") == ('Mon', 'Sun') + assert IbisInterface.range(self.reference_table, "numerical") == (np.float64(1.1), np.float64(5.5)) + + def test_dimension_type(self): + assert IbisInterface.dimension_type(self.reference_table, "date") is np.datetime64 + assert IbisInterface.dimension_type(self.reference_table, "string") is np.object_ + assert IbisInterface.dimension_type(self.reference_table, "numerical") is np.float64 + def test_datetime_xaxis(self): """Test to make sure a DateTimeAxis can be identified for the bokeh backend""" - # Given - df = pd.DataFrame({ - "x": [pd.Timestamp("2022-01-01"), pd.Timestamp("2022-01-02")], "y": [1,2] - }) - con = ibis.pandas.connect({"df": df}) - table = con.table("df") - plot_ibis = Curve(table, kdims="x", vdims="y") + plot_ibis = Curve(self.reference_table, kdims="date", vdims="actual") # When plot_bokeh = render(plot_ibis, "bokeh") xaxis, yaxis = plot_bokeh.axis @@ -325,16 +343,20 @@ def test_datetime_xaxis(self): def test_categorical_xaxis(self): """Test to make sure a Categorical axis can be identified for the bokeh backend""" - # Given - df = pd.DataFrame({ - "x": ["A", "B"], "y": [1,2] - }) - con = ibis.pandas.connect({"df": df}) - table = con.table("df") - plot_ibis = Curve(table, kdims="x", vdims="y") + plot_ibis = Curve(self.reference_table, kdims="string", vdims="actual") # When plot_bokeh = render(plot_ibis, "bokeh") xaxis, yaxis = plot_bokeh.axis # Then assert isinstance(xaxis, bokeh_axes.CategoricalAxis) assert isinstance(yaxis, bokeh_axes.LinearAxis) + + def test_numerical_xaxis(self): + """Test to make sure a LinearAxis axis can be identified for the bokeh backend""" + plot_ibis = Curve(self.reference_table, kdims="numerical", vdims="actual") + # When + plot_bokeh = render(plot_ibis, "bokeh") + xaxis, yaxis = plot_bokeh.axis + # Then + assert isinstance(xaxis, bokeh_axes.LinearAxis) + assert isinstance(yaxis, bokeh_axes.LinearAxis) From deeea89d23469b90e14b1d05dbfca7345f4861fe Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Mon, 28 Nov 2022 08:29:26 +0100 Subject: [PATCH 07/11] iteration on ibis duckdb RowID error --- .../tests/core/data/test_ibisinterface.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index 5bf1fec830..8f10c1c7be 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -360,3 +360,50 @@ def test_numerical_xaxis(self): # Then assert isinstance(xaxis, bokeh_axes.LinearAxis) assert isinstance(yaxis, bokeh_axes.LinearAxis) + +import pytest +import ibis +import duckdb +from pathlib import Path + +def create_pandas_connection(df: pd.DataFrame, *args, **kwargs): + return ibis.pandas.connect({"df": df}) + +def create_duckdb_connection(df: pd.DataFrame, *args, **kwargs): + tmpdir = kwargs["tmpdir"] + filename = str(Path(tmpdir)/"db.db") + duckdb_con = duckdb.connect(filename) + duckdb_con.execute("CREATE TABLE df AS SELECT * FROM df") + + return ibis.duckdb.connect(filename) + +def create_sqlite_connection(df: pd.DataFrame): + return create_temp_db(df, "df") + +@pytest.fixture +def reference_df(): + return pd.DataFrame( + { + "actual": [100, 150, 125, 140, 145, 135, 123], + "forecast": [90, 160, 125, 150, 141, 141, 120], + "numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5], + "date": pd.date_range("2022-01-03", "2022-01-09"), + "string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], + }, + ) + +@pytest.fixture(params=[create_pandas_connection, create_duckdb_connection, create_sqlite_connection]) +def connection(request, reference_df, tmpdir): + return request.param(reference_df, tmpdir=tmpdir) + +@pytest.fixture +def data(connection): + return connection.table("df") + +@pytest.fixture +def dataset(data): + return Dataset(data, kdims=["numerical", "date", "string"], vdims=["actual", "forecast"]) + +def test_index_ibis_table(data): + table = IbisInterface._index_ibis_table(data) + table.execute() \ No newline at end of file From 1419105d5861efece70af25224837db320047dc6 Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Tue, 29 Nov 2022 07:38:30 +0100 Subject: [PATCH 08/11] refactor and test ibis bokeh yaxis --- .../tests/core/data/test_ibisinterface.py | 141 +++++++++++------- 1 file changed, 83 insertions(+), 58 deletions(-) diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index 5bf1fec830..72c901968d 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -1,7 +1,6 @@ import sqlite3 -from unittest import SkipTest - from tempfile import NamedTemporaryFile +from unittest import SkipTest try: import ibis @@ -9,17 +8,24 @@ except: raise SkipTest("Could not import ibis, skipping IbisInterface tests.") +try: + import duckdb +except: + raise SkipTest("Could not import duckdb, skipping IbisInterface tests.") + +from pathlib import Path + import numpy as np import pandas as pd - +import pytest from bokeh.models import axes as bokeh_axes from holoviews import render from holoviews.core.data import Dataset -from holoviews.core.spaces import HoloMap from holoviews.core.data.ibis import IbisInterface +from holoviews.core.spaces import HoloMap from holoviews.element.chart import Curve -from .base import HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests +from .base import HeterogeneousColumnTests, InterfaceTests, ScalarColumnTests def create_temp_db(df, name, index=False): @@ -103,20 +109,6 @@ def init_column_data(self): hm_db.table("hm"), kdims=[("x", "X")], vdims=[("y", "Y")] ) - reference_df = pd.DataFrame( - { - "actual": [100, 150, 125, 140, 145, 135, 123], - "forecast": [90, 160, 125, 150, 141, 141, 120], - "numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5], - "date": pd.date_range("2022-01-03", "2022-01-09"), - "string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], - }, - ) - reference_db = create_temp_db(reference_df, "reference_df") - self.reference_table = Dataset( - reference_db.table("reference_df"), kdims=["numerical", "date", "string"], vdims=["actual", "forecast"] - ) - def test_dataset_array_init_hm(self): raise SkipTest("Not supported") @@ -321,42 +313,75 @@ def test_dataset_iloc_ellipsis_list_cols(self): def test_dataset_boolean_index(self): raise SkipTest("Not supported") - def test_range(self): - assert IbisInterface.range(self.reference_table, "date") == (np.datetime64('2022-01-03'), np.datetime64('2022-01-09')) - assert IbisInterface.range(self.reference_table, "string") == ('Mon', 'Sun') - assert IbisInterface.range(self.reference_table, "numerical") == (np.float64(1.1), np.float64(5.5)) - - def test_dimension_type(self): - assert IbisInterface.dimension_type(self.reference_table, "date") is np.datetime64 - assert IbisInterface.dimension_type(self.reference_table, "string") is np.object_ - assert IbisInterface.dimension_type(self.reference_table, "numerical") is np.float64 - - def test_datetime_xaxis(self): - """Test to make sure a DateTimeAxis can be identified for the bokeh backend""" - plot_ibis = Curve(self.reference_table, kdims="date", vdims="actual") - # When - plot_bokeh = render(plot_ibis, "bokeh") - xaxis, yaxis = plot_bokeh.axis - # Then - assert isinstance(xaxis, bokeh_axes.DatetimeAxis) - assert isinstance(yaxis, bokeh_axes.LinearAxis) - - def test_categorical_xaxis(self): - """Test to make sure a Categorical axis can be identified for the bokeh backend""" - plot_ibis = Curve(self.reference_table, kdims="string", vdims="actual") - # When - plot_bokeh = render(plot_ibis, "bokeh") - xaxis, yaxis = plot_bokeh.axis - # Then - assert isinstance(xaxis, bokeh_axes.CategoricalAxis) - assert isinstance(yaxis, bokeh_axes.LinearAxis) - - def test_numerical_xaxis(self): - """Test to make sure a LinearAxis axis can be identified for the bokeh backend""" - plot_ibis = Curve(self.reference_table, kdims="numerical", vdims="actual") - # When - plot_bokeh = render(plot_ibis, "bokeh") - xaxis, yaxis = plot_bokeh.axis - # Then - assert isinstance(xaxis, bokeh_axes.LinearAxis) - assert isinstance(yaxis, bokeh_axes.LinearAxis) +def create_pandas_connection(df: pd.DataFrame, *args, **kwargs): + return ibis.pandas.connect({"df": df}) + +def create_duckdb_connection(df: pd.DataFrame, *args, **kwargs): + tmpdir = kwargs["tmpdir"] + filename = str(Path(tmpdir)/"db.db") + duckdb_con = duckdb.connect(filename) + duckdb_con.execute("CREATE TABLE df AS SELECT * FROM df") + + return ibis.duckdb.connect(filename) + +def create_sqlite_connection(df: pd.DataFrame, *args, **kwargs): + return create_temp_db(df, "df") + +@pytest.fixture +def reference_df(): + return pd.DataFrame( + { + "actual": [100, 150, 125, 140, 145, 135, 123], + "forecast": [90, 160, 125, 150, 141, 141, 120], + "numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5], + "date": pd.date_range("2022-01-03", "2022-01-09"), + "string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], + }, + ) + +@pytest.fixture(params=[create_pandas_connection, create_duckdb_connection, create_sqlite_connection]) +def connection(request, reference_df, tmpdir): + return request.param(reference_df, tmpdir=tmpdir) + +@pytest.fixture +def data(connection): + return connection.table("df") + +@pytest.fixture +def dataset(data): + return Dataset( + data, kdims=["numerical", "date", "string"], vdims=["actual", "forecast"] + ) + +@pytest.mark.parametrize(["dimension", "expected"], [ + ("date", (np.datetime64('2022-01-03'), np.datetime64('2022-01-09'))), + ("string", ('Mon', 'Sun')), + ("numerical",(np.float64(1.1), np.float64(5.5))), +]) +def test_range(dimension, expected, dataset): + assert IbisInterface.range(dataset, dimension) == expected + +@pytest.mark.parametrize(["dimension", "expected"], [ + ("date", np.datetime64), + ("string", np.object_), + ("numerical", np.float64), +]) +def test_dimension_type(dimension, expected, dataset): + assert IbisInterface.dimension_type(dataset, dimension) is expected + +@pytest.mark.parametrize(["kdims", "vdims", "xaxis_type", "yaxis_type"], [ + ("date", "actual", bokeh_axes.DatetimeAxis, bokeh_axes.LinearAxis), + ("string", "actual", bokeh_axes.CategoricalAxis, bokeh_axes.LinearAxis), + ("numerical", "actual", bokeh_axes.LinearAxis, bokeh_axes.LinearAxis), + ("numerical", "date", bokeh_axes.LinearAxis, bokeh_axes.DatetimeAxis), + ("numerical", "string", bokeh_axes.LinearAxis, bokeh_axes.CategoricalAxis), + ]) +def test_bokeh_axis(data, kdims, vdims, xaxis_type, yaxis_type): + """Test to make sure the right axis can be identified for the bokeh backend""" + plot_ibis = Curve(data, kdims=kdims, vdims=vdims) + # When + plot_bokeh = render(plot_ibis, "bokeh") + xaxis, yaxis = plot_bokeh.axis + # Then + assert isinstance(xaxis, xaxis_type) + assert isinstance(yaxis, yaxis_type) From ddaeb3daa6b78dc164e928687d93c509b41255de Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Tue, 29 Nov 2022 07:45:23 +0100 Subject: [PATCH 09/11] refactor --- holoviews/core/data/ibis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 485dae64db..597e3c6d56 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -111,13 +111,14 @@ def nonzero(cls, dataset): @cached def range(cls, dataset, dimension): dimension = dataset.get_dimension(dimension, strict=True) - if cls.dtype(dataset, dimension).kind == 'O': + dtype_kind = cls.dtype(dataset, dimension).kind + if dtype_kind == 'O': # Can this be done more efficiently? column = dataset.data[dimension.name].execute() first = column.iloc[0] last = column.iloc[-1] return first, last - if cls.dtype(dataset, dimension).kind in 'SU': + if dtype_kind in 'SU': return None, None if dimension.nodata is not None: return Interface.range(dataset, dimension) From a344388e95a583cd27ad33045b1996ee9d8e119f Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Thu, 1 Dec 2022 20:51:10 +0100 Subject: [PATCH 10/11] fixes the ibis histogram --- holoviews/core/data/ibis.py | 18 ++++++++++++++++-- .../tests/core/data/test_ibisinterface.py | 7 +++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 597e3c6d56..3a04ce63cf 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -154,9 +154,23 @@ def values( def histogram(cls, expr, bins, density=True, weights=None): bins = numpy.asarray(bins) bins = [int(v) if bins.dtype.kind in 'iu' else float(v) for v in bins] - binned = expr.bucket(bins).name('bucket') + + # See https://github.com/ibis-project/ibis/issues/4940#issuecomment-1334181645 + df = expr.to_projection() + try: + hist_bins = ( + df + .mutate(bucket=expr.bucket(bins)) + .bucket + .value_counts() + .sort_by('bucket') + ).execute() + except NotImplementedError: + # See https://github.com/ibis-project/ibis/issues/4939 + array = expr.execute() + return numpy.histogram(array, bins=bins, density=density, weights=weights) + hist = numpy.zeros(len(bins)-1) - hist_bins = binned.value_counts().sort_by('bucket').execute() for b, v in zip(hist_bins['bucket'], hist_bins['count']): if numpy.isnan(b): continue diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index 6ec0ddcdf7..c3359d7311 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -371,6 +371,13 @@ def test_range(dimension, expected, dataset): def test_dimension_type(dimension, expected, dataset): assert IbisInterface.dimension_type(dataset, dimension) is expected +def test_histogram(data): + expr = data[data.actual.notnull()].actual + bins = [90.0, 113.33333333333333, 136.66666666666666, 160.0] + result = IbisInterface.histogram(expr, bins, density=False) + np.testing.assert_array_equal(result[0], np.array([1, 3, 3])) + np.testing.assert_array_equal(result[1], np.array(bins)) + @pytest.mark.parametrize(["kdims", "vdims", "xaxis_type", "yaxis_type"], [ ("date", "actual", bokeh_axes.DatetimeAxis, bokeh_axes.LinearAxis), ("string", "actual", bokeh_axes.CategoricalAxis, bokeh_axes.LinearAxis), From ab4898ced2b1e09f64c935d14d1381c9d9ad12c2 Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sat, 3 Dec 2022 05:47:52 +0100 Subject: [PATCH 11/11] wip --- holoviews/core/data/ibis.py | 14 +++++++++-- .../tests/core/data/test_ibisinterface.py | 23 +++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 3a04ce63cf..a06b987742 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -41,12 +41,22 @@ def has_rowid(cls): return hasattr(ibis.expr.operations, "RowID") @classmethod - def is_rowid_zero_indexed(cls, data): + def _get_backend(cls, data): try: from ibis.client import find_backends, validate_backends (backend,) = validate_backends(list(find_backends(data))) + return backend except Exception: - backend = data._find_backend() + pass + + try: + return data._find_backend() + except ibis.common.exceptions.IbisError: + return "ibis.backends.not_found" + + @classmethod + def is_rowid_zero_indexed(cls, data): + backend = cls._get_backend(data) return type(backend).__module__ in cls.zero_indexed_backend_modules @classmethod diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index c3359d7311..9a1e6df132 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -17,6 +17,7 @@ import numpy as np import pandas as pd +import param import pytest from bokeh.models import axes as bokeh_axes from holoviews import render @@ -313,10 +314,10 @@ def test_dataset_iloc_ellipsis_list_cols(self): def test_dataset_boolean_index(self): raise SkipTest("Not supported") -def create_pandas_connection(df: pd.DataFrame, *args, **kwargs): +def pandas_data(df: pd.DataFrame, *args, **kwargs): return ibis.pandas.connect({"df": df}) -def create_duckdb_connection(df: pd.DataFrame, *args, **kwargs): +def ibis_duckdb_data(df: pd.DataFrame, *args, **kwargs): tmpdir = kwargs["tmpdir"] filename = str(Path(tmpdir)/"db.db") duckdb_con = duckdb.connect(filename) @@ -324,9 +325,20 @@ def create_duckdb_connection(df: pd.DataFrame, *args, **kwargs): return ibis.duckdb.connect(filename) -def create_sqlite_connection(df: pd.DataFrame, *args, **kwargs): +def ibis_sqlite_data(df: pd.DataFrame, *args, **kwargs): return create_temp_db(df, "df") +class IbisMemConnection(param.Parameterized): + def __init__(self, df): + super().__init__() + self._table = ibis.memtable(df) + + def table(self, df): + return self._table + +def ibis_mem_table(df: pd.DataFrame, *args, **kwargs): + return IbisMemConnection(df=df) + @pytest.fixture def reference_df(): return pd.DataFrame( @@ -339,7 +351,7 @@ def reference_df(): }, ) -@pytest.fixture(params=[create_pandas_connection, create_duckdb_connection, create_sqlite_connection]) +@pytest.fixture(params=[pandas_data, ibis_duckdb_data, ibis_sqlite_data, ibis_mem_table]) def connection(request, reference_df, tmpdir): return request.param(reference_df, tmpdir=tmpdir) @@ -351,6 +363,9 @@ def data(connection): def dataset(data): return Dataset(data, kdims=["numerical", "date", "string"], vdims=["actual", "forecast"]) +def test_get_backend(data): + assert IbisInterface._get_backend(data) + def test_index_ibis_table(data): table = IbisInterface._index_ibis_table(data) table.execute()