diff --git a/extension_config_wasm.cmake b/extension_config_wasm.cmake index 5553281cf..8856de1e8 100644 --- a/extension_config_wasm.cmake +++ b/extension_config_wasm.cmake @@ -2,11 +2,11 @@ # DuckDB-Wasm extension base config ################################################################################ # + duckdb_extension_load(json DONT_LINK) duckdb_extension_load(parquet DONT_LINK) duckdb_extension_load(autocomplete DONT_LINK) -duckdb_extension_load(fts DONT_LINK) duckdb_extension_load(icu DONT_LINK) duckdb_extension_load(tpcds DONT_LINK) duckdb_extension_load(tpch DONT_LINK) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8cfc7a59e..f7df08312 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -243,10 +243,6 @@ if (DUCKDB_WASM_LOADABLE_EXTENSIONS) target_link_libraries(duckdb_web duckdb arrow rapidjson ${THREAD_LIBS}) else() - add_library( - duckdb_web_fts - ${CMAKE_SOURCE_DIR}/src/extensions/fts_extension.cc) - add_library( duckdb_web_parquet ${CMAKE_SOURCE_DIR}/src/extensions/parquet_extension.cc) @@ -264,8 +260,7 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDUCKDB_JSON_EXTENSION") endif() - target_link_libraries(duckdb_web duckdb duckdb_web_fts duckdb_web_parquet ${DUCKDB_WEB_JSON} arrow rapidjson ${THREAD_LIBS}) - target_link_libraries(duckdb_web_fts duckdb duckdb_fts) + target_link_libraries(duckdb_web duckdb duckdb_web_parquet ${DUCKDB_WEB_JSON} arrow rapidjson ${THREAD_LIBS}) target_link_libraries(duckdb_web_parquet duckdb duckdb_parquet) target_link_libraries(duckdb_web_json duckdb duckdb_json) endif() @@ -275,6 +270,8 @@ endif() # We need "-s WARN_ON_UNDEFINED_SYMBOLS=0" to instantiate the module with our # own imports. + # --pre-js "/Users/carlo/databricks/duckdb_delta/file_system_packager.js" \ +# if(EMSCRIPTEN) add_executable(duckdb_wasm ${CMAKE_SOURCE_DIR}/src/wasm_main.cc) target_link_libraries(duckdb_wasm duckdb_web ${THREAD_LIBS}) @@ -371,7 +368,7 @@ if(NOT EMSCRIPTEN) ${CMAKE_SOURCE_DIR}/test/webdb_test.cc ${CMAKE_SOURCE_DIR}/test/tester.cc) - set(TEST_LIBS duckdb_web duckdb_web_fts duckdb_web_parquet ${DUCKDB_WEB_JSON} gtest gmock gflags ${THREAD_LIBS}) + set(TEST_LIBS duckdb_web duckdb_web_parquet ${DUCKDB_WEB_JSON} gtest gmock gflags ${THREAD_LIBS}) add_executable(tester ${TEST_CC}) target_link_libraries(tester ${TEST_LIBS}) diff --git a/lib/cmake/duckdb.cmake b/lib/cmake/duckdb.cmake index fcea7d7da..823dfd996 100644 --- a/lib/cmake/duckdb.cmake +++ b/lib/cmake/duckdb.cmake @@ -15,7 +15,7 @@ endif() set(DUCKDB_CXX_FLAGS "${DUCKDB_CXX_FLAGS} -Wno-unqualified-std-cast-call -DDUCKDB_DEBUG_NO_SAFETY -DDUCKDB_FROM_DUCKDB_WASM") message("DUCKDB_CXX_FLAGS=${DUCKDB_CXX_FLAGS}") -set(DUCKDB_EXTENSIONS "fts;json") +set(DUCKDB_EXTENSIONS "json;core_functions") # Escape semicolons in DUCKDB_EXTENSIONS before passing to ExternalProject_Add string(REPLACE ";" "$" DUCKDB_EXTENSIONS_PACKED "${DUCKDB_EXTENSIONS}") @@ -46,6 +46,7 @@ ExternalProject_Add( -DDISABLE_BUILTIN_EXTENSIONS=TRUE -DUSE_WASM_THREADS=${USE_WASM_THREADS} -DDUCKDB_EXPLICIT_PLATFORM=${DUCKDB_EXPLICIT_PLATFORM} + -DSMALLER_BINARY=1 BUILD_BYPRODUCTS /lib/libduckdb_re2.a /lib/libduckdb_static.a @@ -59,7 +60,7 @@ ExternalProject_Add( /lib/libduckdb_utf8proc.a /lib/libduckdb_fastpforlib.a /lib/libparquet_extension.a - /lib/libfts_extension.a + /lib/libcore_functions_extension.a /lib/libjson_extension.a) ExternalProject_Get_Property(duckdb_ep install_dir) @@ -90,6 +91,7 @@ target_link_libraries( INTERFACE ${install_dir}/lib/libduckdb_pg_query.a INTERFACE ${install_dir}/lib/libduckdb_utf8proc.a INTERFACE ${install_dir}/lib/libduckdb_fastpforlib.a + INTERFACE ${install_dir}/lib/libcore_functions_extension.a INTERFACE dl) target_include_directories( @@ -105,10 +107,6 @@ target_include_directories( INTERFACE ${DUCKDB_SOURCE_DIR}/third_party/thrift INTERFACE ${DUCKDB_SOURCE_DIR}/third_party/zstd) -add_library(duckdb_fts STATIC IMPORTED) -set_property(TARGET duckdb_fts PROPERTY IMPORTED_LOCATION ${install_dir}/lib/libfts_extension.a) -target_include_directories(duckdb_fts INTERFACE ${DUCKDB_SOURCE_DIR}/extension/fts/include) - add_library(duckdb_parquet STATIC IMPORTED) set_property(TARGET duckdb_parquet PROPERTY IMPORTED_LOCATION ${install_dir}/lib/libparquet_extension.a) target_include_directories(duckdb_parquet INTERFACE ${DUCKDB_SOURCE_DIR}/extension/parquet/include) @@ -117,7 +115,10 @@ add_library(duckdb_json STATIC IMPORTED) set_property(TARGET duckdb_json PROPERTY IMPORTED_LOCATION ${install_dir}/lib/libjson_extension.a) target_include_directories(duckdb_json INTERFACE ${DUCKDB_SOURCE_DIR}/extension/json/include) +add_library(duckdb_core_functions STATIC IMPORTED) +set_property(TARGET duckdb_core_functions PROPERTY IMPORTED_LOCATION ${install_dir}/lib/libcore_functions_extension.a) +target_include_directories(duckdb_core_functions INTERFACE ${DUCKDB_SOURCE_DIR}/extension/json/include) + add_dependencies(duckdb duckdb_ep) -add_dependencies(duckdb_fts duckdb_ep) add_dependencies(duckdb_parquet duckdb_ep) add_dependencies(duckdb_json duckdb_ep) diff --git a/lib/include/duckdb/web/extensions/fts_extension.h b/lib/include/duckdb/web/extensions/fts_extension.h deleted file mode 100644 index cd7409661..000000000 --- a/lib/include/duckdb/web/extensions/fts_extension.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INCLUDE_DUCKDB_WEB_EXTENSIONS_FTS_EXTENSION_H_ -#define INCLUDE_DUCKDB_WEB_EXTENSIONS_FTS_EXTENSION_H_ - -#include "duckdb/main/database.hpp" - -extern "C" void duckdb_web_fts_init(duckdb::DuckDB* db); - -#endif diff --git a/lib/include/duckdb/web/io/web_filesystem.h b/lib/include/duckdb/web/io/web_filesystem.h index 71a1ef780..b08527758 100644 --- a/lib/include/duckdb/web/io/web_filesystem.h +++ b/lib/include/duckdb/web/io/web_filesystem.h @@ -132,7 +132,7 @@ class WebFileSystem : public duckdb::FileSystem { public: /// Constructor WebFileHandle(std::shared_ptr file) - : duckdb::FileHandle(file->GetFileSystem(), file->GetFileName()), + : duckdb::FileHandle(file->GetFileSystem(), file->GetFileName(), FileOpenFlags::FILE_FLAGS_READ), file_(file), readahead_(nullptr), position_(0) { diff --git a/lib/src/extensions/fts_extension.cc b/lib/src/extensions/fts_extension.cc deleted file mode 100644 index d5fff2114..000000000 --- a/lib/src/extensions/fts_extension.cc +++ /dev/null @@ -1,5 +0,0 @@ -#include "duckdb/web/extensions/fts_extension.h" - -#include "fts_extension.hpp" - -extern "C" void duckdb_web_fts_init(duckdb::DuckDB* db) { db->LoadExtension(); } diff --git a/lib/src/io/buffered_filesystem.cc b/lib/src/io/buffered_filesystem.cc index 3b0b0fa48..64ea5e395 100644 --- a/lib/src/io/buffered_filesystem.cc +++ b/lib/src/io/buffered_filesystem.cc @@ -42,7 +42,7 @@ void BufferedFileHandle::Close() { file_ref_->Release(); } /// Constructor BufferedFileHandle::BufferedFileHandle(duckdb::FileSystem &file_system, std::shared_ptr file_buffers) - : duckdb::FileHandle(file_system, std::string{file_buffers->GetPath()}), + : duckdb::FileHandle(file_system, std::string{file_buffers->GetPath()}, FileOpenFlags::FILE_FLAGS_READ), file_ref_(std::move(file_buffers)), file_position_(0) {} diff --git a/lib/src/io/memory_filesystem.cc b/lib/src/io/memory_filesystem.cc index 1c6f14332..da8c1ea64 100644 --- a/lib/src/io/memory_filesystem.cc +++ b/lib/src/io/memory_filesystem.cc @@ -17,7 +17,8 @@ MemoryFileSystem::FileBuffer::FileBuffer(size_t id, std::string path, std::vecto : file_id(id), file_path(std::move(path)), buffer(std::move(buffer)), handles() {} /// Constructor MemoryFileSystem::FileHandle::FileHandle(MemoryFileSystem &file_system, FileBuffer &buffer, size_t position) - : duckdb::FileHandle(file_system, buffer.file_path), + : duckdb::FileHandle(file_system, buffer.file_path, + FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE), file_system_(file_system), buffer_(buffer), position_(position) {} diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 447e10ffe..c93e633d3 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -45,7 +45,6 @@ #include "duckdb/web/config.h" #include "duckdb/web/csv_insert_options.h" #include "duckdb/web/environment.h" -#include "duckdb/web/extensions/fts_extension.h" #include "duckdb/web/extensions/json_extension.h" #include "duckdb/web/extensions/parquet_extension.h" #include "duckdb/web/functions/table_function_relation.h" @@ -831,7 +830,6 @@ arrow::Status WebDB::Open(std::string_view args_json) { auto db = make_shared_ptr(config_->path, &db_config); #ifndef WASM_LOADABLE_EXTENSIONS duckdb_web_parquet_init(db.get()); - duckdb_web_fts_init(db.get()); #if defined(DUCKDB_JSON_EXTENSION) duckdb_web_json_init(db.get()); #endif diff --git a/patches/duckdb/duckdb_smaller_binary_no_select.patch b/patches/duckdb/duckdb_smaller_binary_no_select.patch new file mode 100644 index 000000000..106718fcf --- /dev/null +++ b/patches/duckdb/duckdb_smaller_binary_no_select.patch @@ -0,0 +1,37 @@ +diff --git a/src/execution/expression_executor.cpp b/src/execution/expression_executor.cpp +index 63a24f479e..eadf1dea0d 100644 +--- a/src/execution/expression_executor.cpp ++++ b/src/execution/expression_executor.cpp +@@ -236,7 +236,7 @@ idx_t ExpressionExecutor::Select(const Expression &expr, ExpressionState *state, + D_ASSERT(true_sel || false_sel); + D_ASSERT(expr.return_type.id() == LogicalTypeId::BOOLEAN); + switch (expr.expression_class) { +-#ifndef DUCKDB_SMALLER_BINARY ++#ifndef DUCKDB_SMALLER_BINARY_NO_SELECT + case ExpressionClass::BOUND_BETWEEN: + return Select(expr.Cast(), state, sel, count, true_sel, false_sel); + #endif +diff --git a/src/execution/expression_executor/execute_between.cpp b/src/execution/expression_executor/execute_between.cpp +index 3418351362..52cf454530 100644 +--- a/src/execution/expression_executor/execute_between.cpp ++++ b/src/execution/expression_executor/execute_between.cpp +@@ -7,7 +7,7 @@ + + namespace duckdb { + +-#ifndef DUCKDB_SMALLER_BINARY ++#ifndef DUCKDB_SMALLER_BINARY_NO_SELECT + struct BothInclusiveBetweenOperator { + template + static inline bool Operation(T input, T lower, T upper) { +@@ -133,8 +133,8 @@ void ExpressionExecutor::Execute(const BoundBetweenExpression &expr, ExpressionS + + idx_t ExpressionExecutor::Select(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { +-#ifdef DUCKDB_SMALLER_BINARY +- throw InternalException("ExpressionExecutor::Select not available with DUCKDB_SMALLER_BINARY"); ++#ifdef DUCKDB_SMALLER_BINARY_NO_SELECT ++ throw InternalException("ExpressionExecutor::Select not available with DUCKDB_SMALLER_BINARY_NO_SELECT"); + #else + // resolve the children + Vector input(state->intermediate_chunk.data[0]); diff --git a/patches/duckdb/extension_install_rework.patch b/patches/duckdb/extension_install_rework.patch index 0d5a7e9a5..4b538c33f 100644 --- a/patches/duckdb/extension_install_rework.patch +++ b/patches/duckdb/extension_install_rework.patch @@ -5,7 +5,7 @@ index 2a6fffa994..f4ef687ddd 100644 @@ -96,6 +96,10 @@ private: shared_ptr db_cache_entry; - duckdb_ext_api_v0 (*create_api_v0)(); + duckdb_ext_api_v1 (*create_api_v1)(); +public: + static void SetPreferredRepository(const string& extension, const string &repository); + static string GetPreferredRepository(const string& extension); diff --git a/patches/duckdb/hardcode_abi_type.patch b/patches/duckdb/hardcode_abi_type.patch new file mode 100644 index 000000000..be3ad273e --- /dev/null +++ b/patches/duckdb/hardcode_abi_type.patch @@ -0,0 +1,13 @@ +diff --git a/src/main/extension/extension_load.cpp b/src/main/extension/extension_load.cpp +index a7a7c62fef..d560a3b3b3 100644 +--- a/src/main/extension/extension_load.cpp ++++ b/src/main/extension/extension_load.cpp +@@ -451,7 +451,7 @@ bool ExtensionHelper::TryInitialLoad(DatabaseInstance &db, FileSystem &fs, const + result.filebase = lowercase_extension_name; + result.filename = filename; + result.lib_hdl = lib_hdl; +- result.abi_type = parsed_metadata.abi_type; ++ result.abi_type = ExtensionABIType::CPP; + + if (!direct_load) { + auto info_file_name = filename + ".info"; diff --git a/patches/duckdb/is_distict_from.patch b/patches/duckdb/is_distict_from.patch deleted file mode 100644 index 112331c14..000000000 --- a/patches/duckdb/is_distict_from.patch +++ /dev/null @@ -1,86 +0,0 @@ -diff --git a/src/common/vector_operations/is_distinct_from.cpp b/src/common/vector_operations/is_distinct_from.cpp -index e9a31ee0e6..7694001647 100644 ---- a/src/common/vector_operations/is_distinct_from.cpp -+++ b/src/common/vector_operations/is_distinct_from.cpp -@@ -65,17 +65,28 @@ static void DistinctExecute(Vector &left, Vector &right, Vector &result, idx_t c - DistinctExecuteSwitch(left, right, result, count); - } - -+#define DUCKDB_SMALLER_BINARY -+ -+#ifndef DUCKDB_SMALLER_BINARY - template -+#else -+template -+#endif - static inline idx_t - DistinctSelectGenericLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { -+#ifdef DUCKDB_SMALLER_BINARY -+ bool HAS_TRUE_SEL = true_sel; -+ bool HAS_FALSE_SEL = false_sel; -+#endif - idx_t true_count = 0, false_count = 0; - for (idx_t i = 0; i < count; i++) { - auto result_idx = result_sel->get_index(i); - auto lindex = lsel->get_index(i); - auto rindex = rsel->get_index(i); -+#ifndef DUCKDB_SMALLER_BINARY - if (NO_NULL) { - if (OP::Operation(ldata[lindex], rdata[rindex], false, false)) { - if (HAS_TRUE_SEL) { -@@ -86,7 +97,9 @@ DistinctSelectGenericLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *_ - false_sel->set_index(false_count++, result_idx); - } - } -- } else { -+ } else -+#endif -+ { - if (OP::Operation(ldata[lindex], rdata[rindex], !lmask.RowIsValid(lindex), !rmask.RowIsValid(rindex))) { - if (HAS_TRUE_SEL) { - true_sel->set_index(true_count++, result_idx); -@@ -129,6 +142,7 @@ DistinctSelectGenericLoopSwitch(const LEFT_TYPE *__restrict ldata, const RIGHT_T - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { -+#ifndef DUCKDB_SMALLER_BINARY - if (!lmask.AllValid() || !rmask.AllValid()) { - return DistinctSelectGenericLoopSelSwitch( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); -@@ -136,6 +150,10 @@ DistinctSelectGenericLoopSwitch(const LEFT_TYPE *__restrict ldata, const RIGHT_T - return DistinctSelectGenericLoopSelSwitch( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); - } -+#else -+ return DistinctSelectGenericLoop(ldata, rdata, lsel, rsel, result_sel, count, lmask, -+ rmask, true_sel, false_sel); -+#endif - } - - template -@@ -287,6 +305,7 @@ static idx_t DistinctSelect(Vector &left, Vector &right, const SelectionVector * - - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { - return DistinctSelectConstant(left, right, sel, count, true_sel, false_sel); -+#ifndef DUCKDB_SMALLER_BINARY - } else if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && - right.GetVectorType() == VectorType::FLAT_VECTOR) { - return DistinctSelectFlat(left, right, sel, count, true_sel, false_sel); -@@ -296,11 +315,14 @@ static idx_t DistinctSelect(Vector &left, Vector &right, const SelectionVector * - } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && right.GetVectorType() == VectorType::FLAT_VECTOR) { - return DistinctSelectFlat(left, right, sel, count, true_sel, - false_sel); -+#endif - } else { - return DistinctSelectGeneric(left, right, sel, count, true_sel, false_sel); - } - } - -+#undef DUCKDB_SMALLER_BINARY -+ - template - static idx_t DistinctSelectNotNull(Vector &left, Vector &right, const idx_t count, idx_t &true_count, - const SelectionVector &sel, SelectionVector &maybe_vec, OptionalSelection &true_opt, diff --git a/patches/duckdb/signature_capi.patch b/patches/duckdb/signature_capi.patch deleted file mode 100644 index 995ce4599..000000000 --- a/patches/duckdb/signature_capi.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/main/extension/extension_load.cpp b/src/main/extension/extension_load.cpp -index 59fc4e8cd9..b0282a7103 100644 ---- a/src/main/extension/extension_load.cpp -+++ b/src/main/extension/extension_load.cpp -@@ -119,7 +119,7 @@ struct ExtensionAccess { - // The C++ init function - typedef void (*ext_init_fun_t)(DatabaseInstance &); - // The C init function --typedef void (*ext_init_c_api_fun_t)(duckdb_extension_info info, duckdb_extension_access *access); -+typedef bool (*ext_init_c_api_fun_t)(duckdb_extension_info info, duckdb_extension_access *access); - typedef const char *(*ext_version_fun_t)(void); - typedef bool (*ext_is_storage_t)(void); - diff --git a/patches/duckdb/unary_executor.patch b/patches/duckdb/unary_executor.patch deleted file mode 100644 index 5bf31a964..000000000 --- a/patches/duckdb/unary_executor.patch +++ /dev/null @@ -1,37 +0,0 @@ -diff --git a/src/include/duckdb/common/vector_operations/unary_executor.hpp b/src/include/duckdb/common/vector_operations/unary_executor.hpp -index 9f29d7410f..bf77c61e7a 100644 ---- a/src/include/duckdb/common/vector_operations/unary_executor.hpp -+++ b/src/include/duckdb/common/vector_operations/unary_executor.hpp -@@ -136,6 +136,8 @@ private: - } - } - -+#define DUCKDB_SMALLER_BINARY -+ - template - static inline void ExecuteStandard(Vector &input, Vector &result, idx_t count, void *dataptr, bool adds_nulls) { - switch (input.GetVectorType()) { -@@ -153,6 +155,7 @@ private: - } - break; - } -+#ifndef DUCKDB_SMALLER_BINARY - case VectorType::FLAT_VECTOR: { - result.SetVectorType(VectorType::FLAT_VECTOR); - auto result_data = FlatVector::GetData(result); -@@ -162,6 +165,7 @@ private: - FlatVector::Validity(result), dataptr, adds_nulls); - break; - } -+#endif - default: { - UnifiedVectorFormat vdata; - input.ToUnifiedFormat(count, vdata); -@@ -176,6 +180,7 @@ private: - } - } - } -+#undef DUCKDB_SMALLER_BINARY - - public: - template diff --git a/submodules/duckdb b/submodules/duckdb index 19864453f..a91feadf8 160000 --- a/submodules/duckdb +++ b/submodules/duckdb @@ -1 +1 @@ -Subproject commit 19864453f7d0ed095256d848b46e7b8630989bac +Subproject commit a91feadf8cd9231f2591f54265341d004e746c6c