ibis-project · jitingxu1 · Aug 1, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 5, 2024
diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py
@@ -1212,6 +1212,37 @@ def has_operation(cls, operation: type[ops.Value]) -> bool:
             f"{cls.name} backend has not implemented `has_operation` API"
         )
 
+    def read_parquet(
+        self, path: str | Path, table_name: str | None = None, **kwargs: Any
+    ) -> ir.Table:
+        """Register a parquet file as a table in the current backend.
+
+        Parameters
+        ----------
+        path
+            The data source. May be a path to a file, an iterable of files,
+            or directory of parquet files.
+        table_name
+            An optional name to use for the created table. This defaults to
+            a sequentially generated name.
+        **kwargs
+            Additional keyword arguments passed to the pyarrow loading function.
+            See https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
+            for more information.
+
+        Returns
+        -------
+        ir.Table
+            The just-registered table
+
+        """
+        import pyarrow.parquet as pq
+
+        table = pq.read_table(path, **kwargs)
+        table_name = table_name or util.gen_name("read_parquet")
+        self.create_table(table_name, table)
+        return self.table(table_name)
+
     def _cached(self, expr: ir.Table):
         """Cache the provided expression.
 

diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py
@@ -421,12 +421,13 @@ def test_register_garbage(con, monkeypatch):
         ("functional_alltypes.parquet", "funk_all"),
     ],
 )
-@pytest.mark.notyet(
-    ["flink", "impala", "mssql", "mysql", "postgres", "risingwave", "sqlite", "trino"]
-)
+@pytest.mark.notyet(["flink"])
 def test_read_parquet(con, tmp_path, data_dir, fname, in_table_name):
     pq = pytest.importorskip("pyarrow.parquet")
 
+    if con.name in ["oracle", "exasol"]:
+        pytest.skip("Skip Exasol and Oracle because of the global pytestmark")
+
     fname = Path(fname)
     fname = Path(data_dir) / "parquet" / fname.name
     table = pq.read_table(fname)
@@ -452,19 +453,7 @@ def ft_data(data_dir):
     return table.slice(0, nrows)
 
 
-@pytest.mark.notyet(
-    [
-        "flink",
-        "impala",
-        "mssql",
-        "mysql",
-        "pandas",
-        "postgres",
-        "risingwave",
-        "sqlite",
-        "trino",
-    ]
-)
+@pytest.mark.notyet(["flink"])
 def test_read_parquet_glob(con, tmp_path, ft_data):
     pq = pytest.importorskip("pyarrow.parquet")
 
@@ -476,7 +465,11 @@ def test_read_parquet_glob(con, tmp_path, ft_data):
     for fname in fnames:
         pq.write_table(ft_data, tmp_path / fname)
 
-    table = con.read_parquet(tmp_path / f"*.{ext}")
+    if con.name == "clickhouse":
+        # clickhouse does not support read directory
+        table = con.read_parquet(tmp_path / f"*.{ext}")
+    else:
+        table = con.read_parquet(tmp_path)
 
     assert table.count().execute() == nrows * ntables