Skip to content

Commit

Permalink
Improve error when read_xxx functions fail (#277)
Browse files Browse the repository at this point in the history
Previously the error would be shown as a WARNING and then the query
would be run as a fallback using the Postgres executor, which would fail
because the DuckDB `read_csv`/`read_parquet` functions cannot be
executed by the Postgres executor. This is quite confusing as many
people only look at the last ERROR, and not the warnings before.

This is also relevant for MotherDuck support, since the MotherDuck
backed tables can only be read using the DuckDB executor.

Fixes #106
Related to #209 (the error there looks terribly confusing)
  • Loading branch information
JelteF authored Oct 14, 2024
1 parent 4a7596c commit d07a5de
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 23 deletions.
2 changes: 1 addition & 1 deletion include/pgduckdb/pgduckdb_planner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ extern "C" {

extern bool duckdb_explain_analyze;

PlannedStmt *DuckdbPlanNode(Query *parse, int cursor_options);
PlannedStmt *DuckdbPlanNode(Query *parse, int cursor_options, bool throw_error);
std::tuple<duckdb::unique_ptr<duckdb::PreparedStatement>, duckdb::unique_ptr<duckdb::Connection>>
DuckdbPrepare(const Query *query);
12 changes: 4 additions & 8 deletions src/pgduckdb_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,20 +166,16 @@ IsAllowedStatement(Query *query, bool throw_error = false) {
static PlannedStmt *
DuckdbPlannerHook(Query *parse, const char *query_string, int cursor_options, ParamListInfo bound_params) {
if (pgduckdb::IsExtensionRegistered()) {
if (duckdb_execution && IsAllowedStatement(parse)) {
PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, cursor_options);
if (duckdbPlan) {
return duckdbPlan;
}
}

if (NeedsDuckdbExecution(parse)) {
IsAllowedStatement(parse, true);

PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, cursor_options);
return DuckdbPlanNode(parse, cursor_options, true);
} else if (duckdb_execution && IsAllowedStatement(parse)) {
PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, cursor_options, false);
if (duckdbPlan) {
return duckdbPlan;
}
/* If we can't create a plan, we'll fall back to Postgres */
}
}

Expand Down
14 changes: 7 additions & 7 deletions src/pgduckdb_planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,16 @@ DuckdbPrepare(const Query *query) {
}

static Plan *
CreatePlan(Query *query) {
CreatePlan(Query *query, bool throw_error) {
int elevel = throw_error ? ERROR : WARNING;
/*
* Prepare the query, se we can get the returned types and column names.
*/
auto prepare_result = DuckdbPrepare(query);
auto prepared_query = std::move(std::get<0>(prepare_result));

if (prepared_query->HasError()) {
elog(WARNING, "(PGDuckDB/CreatePlan) Prepared query returned an error: '%s",
prepared_query->GetError().c_str());
elog(elevel, "(PGDuckDB/CreatePlan) Prepared query returned an error: '%s", prepared_query->GetError().c_str());
return nullptr;
}

Expand All @@ -74,7 +74,7 @@ CreatePlan(Query *query) {
Oid postgresColumnOid = pgduckdb::GetPostgresDuckDBType(column);

if (!OidIsValid(postgresColumnOid)) {
elog(WARNING, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
elog(elevel, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
return nullptr;
}

Expand All @@ -83,7 +83,7 @@ CreatePlan(Query *query) {

tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(postgresColumnOid));
if (!HeapTupleIsValid(tp)) {
elog(WARNING, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
elog(elevel, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid);
return nullptr;
}

Expand All @@ -105,9 +105,9 @@ CreatePlan(Query *query) {
}

PlannedStmt *
DuckdbPlanNode(Query *parse, int cursor_options) {
DuckdbPlanNode(Query *parse, int cursor_options, bool throw_error) {
/* We need to check can we DuckDB create plan */
Plan *plan = pgduckdb::DuckDBFunctionGuard<Plan *>(CreatePlan, "CreatePlan", parse);
Plan *plan = pgduckdb::DuckDBFunctionGuard<Plan *>(CreatePlan, "CreatePlan", parse, throw_error);
Plan *duckdb_plan = (Plan *)castNode(CustomScan, plan);

if (!duckdb_plan) {
Expand Down
8 changes: 5 additions & 3 deletions test/regression/expected/read_functions.out
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.le
4.5
(5 rows)

SELECT "sepal.length", file_row_number, filename
FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
SELECT "sepal.length", file_row_number, filename
FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
AS ("sepal.length" FLOAT, file_row_number INT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
sepal.length | file_row_number | filename
--------------+-----------------+-------------------------
Expand Down Expand Up @@ -47,7 +47,7 @@ SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FL
(5 rows)

SELECT "sepal.length", filename
FROM read_csv('../../data/iris.csv', filename => true, header => true)
FROM read_csv('../../data/iris.csv', filename => true, header => true)
AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
sepal.length | filename
--------------+---------------------
Expand All @@ -58,3 +58,5 @@ SELECT "sepal.length", filename
4.5 | ../../data/iris.csv
(5 rows)

SELECT * FROM read_csv('../../non-existing-file.csv') AS ("sepal.length" FLOAT);
ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'IO Error: No files found that match the pattern "../../non-existing-file.csv"
10 changes: 6 additions & 4 deletions test/regression/sql/read_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ SELECT count("sepal.length") FROM read_parquet('../../data/iris.parquet') AS ("s

SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT) ORDER BY "sepal.length" LIMIT 5;

SELECT "sepal.length", file_row_number, filename
FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
SELECT "sepal.length", file_row_number, filename
FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
AS ("sepal.length" FLOAT, file_row_number INT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;

-- read_csv
Expand All @@ -18,5 +18,7 @@ SELECT count("sepal.length") FROM read_csv('../../data/iris.csv') AS ("sepal.len
SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT) ORDER BY "sepal.length" LIMIT 5;

SELECT "sepal.length", filename
FROM read_csv('../../data/iris.csv', filename => true, header => true)
AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;
FROM read_csv('../../data/iris.csv', filename => true, header => true)
AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length" LIMIT 5;

SELECT * FROM read_csv('../../non-existing-file.csv') AS ("sepal.length" FLOAT);

0 comments on commit d07a5de

Please sign in to comment.