From a83ab7ca0ffd4fba87dcca6a8c812d6a68f526ce Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 24 Sep 2025 15:57:19 +0800
Subject: [PATCH] feat: support executing queries without pyarrow when it's
 missing

---
 .../workflows/build_macos_arm64_wheels.yml    |  2 +-
 .github/workflows/build_macos_x86_wheels.yml  |  2 +-
 chdb/state/sqlitelike.py                      | 63 ++++++++++++-------
 3 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml
index 96ef0b988a6..5cd4cdc13ec 100644
--- a/.github/workflows/build_macos_arm64_wheels.yml
+++ b/.github/workflows/build_macos_arm64_wheels.yml
@@ -79,7 +79,7 @@ jobs:
           brew install ca-certificates lz4 mpdecimal openssl@3 readline sqlite xz z3 zstd
           brew install --ignore-dependencies llvm@19
           brew install git ninja libtool gettext gcc binutils grep findutils nasm
-          brew install --build-from-source ccache
+          brew install --build-from-source ccache || echo "ccache installation failed, continuing without it"
           brew install go
           cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm
           export PATH=$(brew --prefix llvm@19)/bin:$PATH
diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml
index 85ebe048c87..c3df844ae20 100644
--- a/.github/workflows/build_macos_x86_wheels.yml
+++ b/.github/workflows/build_macos_x86_wheels.yml
@@ -79,7 +79,7 @@ jobs:
           brew install ca-certificates lz4 mpdecimal openssl@3 readline sqlite xz z3 zstd
           brew install --ignore-dependencies llvm@19
           brew install git ninja libtool gettext gcc binutils grep findutils nasm
-          brew install --build-from-source ccache
+          brew install --build-from-source ccache || echo "ccache installation failed, continuing without it"
           brew install go
           cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm
           export PATH=$(brew --prefix llvm@19)/bin:$PATH
diff --git a/chdb/state/sqlitelike.py b/chdb/state/sqlitelike.py
index 7694cb42ece..e0f6ad57dc9 100644
--- a/chdb/state/sqlitelike.py
+++ b/chdb/state/sqlitelike.py
@@ -1,13 +1,32 @@
-from typing import Optional, Any
+from typing import Optional, Any, TYPE_CHECKING, List, Tuple
 from chdb import _chdb
 
-# try import pyarrow if failed, raise ImportError with suggestion
-try:
-    import pyarrow as pa  # noqa
-except ImportError as e:
-    print(f"ImportError: {e}")
-    print('Please install pyarrow via "pip install pyarrow"')
-    raise ImportError("Failed to import pyarrow") from None
+if TYPE_CHECKING:
+    import pyarrow as pa
+
+
+def _import_pyarrow():
+    """Lazy import pyarrow when needed."""
+    try:
+        import pyarrow as pa
+        return pa
+    except ImportError:
+        raise ImportError(
+            "PyArrow is required for this feature. "
+            "Install with: pip install pyarrow"
+        ) from None
+
+
+def _import_pandas():
+    """Lazy import pandas when needed."""
+    try:
+        import pandas as pd
+        return pd
+    except ImportError:
+        raise ImportError(
+            "Pandas is required for DataFrame conversion. "
+            "Install with: pip install pandas"
+        ) from None
 
 
 _arrow_format = set({"dataframe", "arrowtable"})
@@ -32,11 +51,11 @@ def to_arrowTable(res):
         pyarrow.Table: PyArrow Table containing the query results
 
     Raises:
-        ImportError: If pyarrow or pandas packages are not installed
+        ImportError: If pyarrow package is not installed
 
     .. note::
-        This function requires both pyarrow and pandas to be installed.
-        Install them with: ``pip install pyarrow pandas``
+        This function requires pyarrow to be installed.
+        Install with: ``pip install pyarrow``
 
     .. warning::
         Empty results return an empty PyArrow Table with no schema.
@@ -52,14 +71,7 @@ def to_arrowTable(res):
            num   text
         0    1  hello
     """
-    # try import pyarrow and pandas, if failed, raise ImportError with suggestion
-    try:
-        import pyarrow as pa  # noqa
-        import pandas as pd  # noqa
-    except ImportError as e:
-        print(f"ImportError: {e}")
-        print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
-        raise ImportError("Failed to import pyarrow or pandas") from None
+    pa = _import_pyarrow()
     if len(res) == 0:
         return pa.Table.from_batches([], schema=pa.schema([]))
     return pa.RecordBatchFileReader(res.bytes()).read_all()
@@ -102,6 +114,7 @@ def to_df(r):
         text    object
         dtype: object
     """
+    _import_pandas()
     t = to_arrowTable(r)
     return t.to_pandas(use_threads=True)
 
@@ -230,7 +243,7 @@ def cancel(self):
             except Exception as e:
                 raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
 
-    def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
+    def record_batch(self, rows_per_batch: int = 1000000) -> "pa.RecordBatchReader":
         """
         Create a PyArrow RecordBatchReader from this StreamingResult.
 
@@ -242,10 +255,11 @@ def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
             rows_per_batch (int): Number of rows per batch. Defaults to 1000000.
 
         Returns:
-            pa.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
+            pyarrow.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
 
         Raises:
             ValueError: If the StreamingResult was not created with arrow format
+            ImportError: If PyArrow is not installed
         """
         if not self._supports_record_batch:
             raise ValueError(
@@ -253,6 +267,7 @@ def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
                 "Please use format='Arrow' when calling send_query."
             )
 
+        pa = _import_pyarrow()
         chdb_reader = ChdbRecordBatchReader(self, rows_per_batch)
         return pa.RecordBatchReader.from_batches(chdb_reader.schema(), chdb_reader)
 
@@ -275,10 +290,12 @@ def __init__(self, chdb_stream_result, batch_size_rows):
         self._current_rows = 0
         self._first_batch = None
         self._first_batch_consumed = True
+        self._pa = _import_pyarrow()
         self._schema = self.schema()
 
     def schema(self):
         if self._schema is None:
+            pa = self._pa
             # Get the first chunk to determine schema
             chunk = self._stream_result.fetch()
             if chunk is not None:
@@ -304,6 +321,8 @@ def schema(self):
         return self._schema
 
     def read_next_batch(self):
+        pa = self._pa
+
         if self._accumulator:
             result = self._accumulator.pop(0)
             return result
@@ -600,7 +619,7 @@ class Cursor:
     def __init__(self, connection):
         self._conn = connection
         self._cursor = self._conn.cursor()
-        self._current_table: Optional[pa.Table] = None
+        self._current_table: Optional[List[Tuple]] = None
         self._current_row: int = 0
 
     def execute(self, query: str) -> None: