Improve error when read_xxx functions fail (#277)

Previously the error would be shown as a WARNING and then the query would be run as a fallback using the Postgres executor, which would fail because the DuckDB `read_csv`/`read_parquet` functions cannot be executed by the Postgres executor. This is quite confusing as many people only look at the last ERROR, and not the warnings before. This is also relevant for MotherDuck support, since the MotherDuck backed tables can only be read using the DuckDB executor. Fixes #106 Related to #209 (the error there looks terribly confusing)
duckdb · Oct 14, 2024 · d07a5de · d07a5de
1 parent 4a7596c
commit d07a5de
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 23 deletions.
diff --git a/include/pgduckdb/pgduckdb_planner.hpp b/include/pgduckdb/pgduckdb_planner.hpp
@@ -11,6 +11,6 @@ extern "C" {
 
 extern bool duckdb_explain_analyze;
 
-PlannedStmt *DuckdbPlanNode(Query *parse, int cursor_options);
+PlannedStmt *DuckdbPlanNode(Query *parse, int cursor_options, bool throw_error);
 std::tuple<duckdb::unique_ptr<duckdb::PreparedStatement>, duckdb::unique_ptr<duckdb::Connection>>
 DuckdbPrepare(const Query *query);
diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp
@@ -166,20 +166,16 @@ IsAllowedStatement(Query *query, bool throw_error = false) {
 static PlannedStmt *
 DuckdbPlannerHook(Query *parse, const char *query_string, int cursor_options, ParamListInfo bound_params) {
  if (pgduckdb::IsExtensionRegistered()) {
- if (duckdb_execution && IsAllowedStatement(parse)) {
- PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, cursor_options);
- if (duckdbPlan) {
- return duckdbPlan;
- }
- }
-
  if (NeedsDuckdbExecution(parse)) {
  IsAllowedStatement(parse, true);
 
- PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, cursor_options);
+ return DuckdbPlanNode(parse, cursor_options, true);
+ } else if (duckdb_execution && IsAllowedStatement(parse)) {
+ PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, cursor_options, false);
  if (duckdbPlan) {
  return duckdbPlan;
  }
+ /* If we can't create a plan, we'll fall back to Postgres */
  }
  }
 

diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp
@@ -52,16 +52,16 @@ DuckdbPrepare(const Query *query) {
 }
 
 static Plan *
-CreatePlan(Query *query) {
+CreatePlan(Query *query, bool throw_error) {
+ int elevel = throw_error ? ERROR : WARNING;
  /*
  * Prepare the query, se we can get the returned types and column names.
  */
  auto prepare_result = DuckdbPrepare(query);
  auto prepared_query = std::move(std::get<0>(prepare_result));
 
  if (prepared_query->HasError()) {
- elog(WARNING, "(PGDuckDB/CreatePlan) Prepared query returned an error: '%s",
- prepared_query->GetError().c_str());
+ elog(elevel, "(PGDuckDB/CreatePlan) Prepared query returned an error: '%s", prepared_query->GetError().c_str());
  return nullptr;
  }
 
@@ -74,7 +74,7 @@ CreatePlan(Query *query) {
  Oid postgresColumnOid = pgduckdb::GetPostgresDuckDBType(column);
 
  if (!OidIsValid(postgresColumnOid)) {
- elog(WARNING, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
+ elog(elevel, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
  return nullptr;
  }
 
@@ -83,7 +83,7 @@ CreatePlan(Query *query) {
 
  tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(postgresColumnOid));
  if (!HeapTupleIsValid(tp)) {
- elog(WARNING, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
+ elog(elevel, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
  return nullptr;
  }
 
@@ -105,9 +105,9 @@ CreatePlan(Query *query) {
 }
 
 PlannedStmt *
-DuckdbPlanNode(Query *parse, int cursor_options) {
+DuckdbPlanNode(Query *parse, int cursor_options, bool throw_error) {
  /* We need to check can we DuckDB create plan */
- Plan *plan = pgduckdb::DuckDBFunctionGuard<Plan *>(CreatePlan, "CreatePlan", parse);
+ Plan *plan = pgduckdb::DuckDBFunctionGuard<Plan *>(CreatePlan, "CreatePlan", parse, throw_error);
  Plan *duckdb_plan = (Plan *)castNode(CustomScan, plan);
 
  if (!duckdb_plan) {

diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
@@ -17,8 +17,8 @@ SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.le
  4.5
 (5 rows)
 
-SELECT "sepal.length", file_row_number, filename 
- FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true) 
+SELECT "sepal.length", file_row_number, filename
+ FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
  AS ("sepal.length" FLOAT, file_row_number INT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
  sepal.length | file_row_number | filename 
 --------------+-----------------+-------------------------
@@ -47,7 +47,7 @@ SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FL
 (5 rows)
 
 SELECT "sepal.length", filename
- FROM read_csv('../../data/iris.csv', filename => true, header => true) 
+ FROM read_csv('../../data/iris.csv', filename => true, header => true)
  AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
  sepal.length | filename 
 --------------+---------------------
@@ -58,3 +58,5 @@ SELECT "sepal.length", filename
  4.5 | ../../data/iris.csv
 (5 rows)
 
+SELECT * FROM read_csv('../../non-existing-file.csv') AS ("sepal.length" FLOAT);
+ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'IO Error: No files found that match the pattern "../../non-existing-file.csv"
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
@@ -7,8 +7,8 @@ SELECT count("sepal.length") FROM read_parquet('../../data/iris.parquet') AS ("s
 
 SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT) ORDER BY "sepal.length" LIMIT 5;
 
-SELECT "sepal.length", file_row_number, filename 
- FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true) 
+SELECT "sepal.length", file_row_number, filename
+ FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
  AS ("sepal.length" FLOAT, file_row_number INT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
 
 -- read_csv
@@ -18,5 +18,7 @@ SELECT count("sepal.length") FROM read_csv('../../data/iris.csv') AS ("sepal.len
 SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT) ORDER BY "sepal.length" LIMIT 5;
 
 SELECT "sepal.length", filename
- FROM read_csv('../../data/iris.csv', filename => true, header => true) 
- AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
+ FROM read_csv('../../data/iris.csv', filename => true, header => true)
+ AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
+
+SELECT * FROM read_csv('../../non-existing-file.csv') AS ("sepal.length" FLOAT);