From 8c28161450b156dff64dee69ff392c1278a21206 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Fri, 12 Jul 2024 16:51:42 +0200 Subject: [PATCH] Move from monolithical patch to multiple patches --- ...b.patch => extension_install_rework.patch} | 91 ------------------- duckdb_patches/file_open_flags.patch | 15 +++ duckdb_patches/is_remote_file.absorbed.patch | 52 +++++++++++ duckdb_patches/loadable_json.patch | 12 +++ duckdb_patches/remote_attach.partial.patch | 30 ++++++ 5 files changed, 109 insertions(+), 91 deletions(-) rename duckdb_patches/{duckdb.patch => extension_install_rework.patch} (80%) create mode 100644 duckdb_patches/file_open_flags.patch create mode 100644 duckdb_patches/is_remote_file.absorbed.patch create mode 100644 duckdb_patches/loadable_json.patch create mode 100644 duckdb_patches/remote_attach.partial.patch diff --git a/duckdb_patches/duckdb.patch b/duckdb_patches/extension_install_rework.patch similarity index 80% rename from duckdb_patches/duckdb.patch rename to duckdb_patches/extension_install_rework.patch index 6fcf64b45..3bf4d30df 100644 --- a/duckdb_patches/duckdb.patch +++ b/duckdb_patches/extension_install_rework.patch @@ -1,94 +1,3 @@ -diff --git a/extension/json/CMakeLists.txt b/extension/json/CMakeLists.txt -index 4101111df8..3e035e80d3 100644 ---- a/extension/json/CMakeLists.txt -+++ b/extension/json/CMakeLists.txt -@@ -34,6 +34,7 @@ set(JSON_EXTENSION_FILES - build_static_extension(json ${JSON_EXTENSION_FILES}) - set(PARAMETERS "-warnings") - build_loadable_extension(json ${PARAMETERS} ${JSON_EXTENSION_FILES}) -+target_link_libraries(json_loadable_extension duckdb_yyjson) - - install( - TARGETS json_extension -diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp -index 27160adc3f..f3dfc7441b 100644 ---- a/src/common/file_system.cpp -+++ b/src/common/file_system.cpp -@@ -623,9 +623,14 @@ FileType FileHandle::GetType() { - } - - bool FileSystem::IsRemoteFile(const string &path) { -- const string prefixes[] = {"http://", "https://", "s3://", "s3a://", "s3n://", "gcs://", "gs://", "r2://", "hf://"}; -- for (auto &prefix : prefixes) { -- if (StringUtil::StartsWith(path, prefix)) { -+ string extension = ""; -+ return IsRemoteFile(path, extension); -+} -+ -+bool FileSystem::IsRemoteFile(const string &path, string &extension) { -+ for (const auto &entry : EXTENSION_FILE_PREFIXES) { -+ if (StringUtil::StartsWith(path, entry.name)) { -+ extension = entry.extension; - return true; - } - } -diff --git a/src/execution/operator/schema/physical_attach.cpp b/src/execution/operator/schema/physical_attach.cpp -index 2c2b76a0fc..2d835441c2 100644 ---- a/src/execution/operator/schema/physical_attach.cpp -+++ b/src/execution/operator/schema/physical_attach.cpp -@@ -96,6 +96,25 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c - } - } - -+ string extension = ""; -+ if (FileSystem::IsRemoteFile(path, extension)) { -+ if (!ExtensionHelper::TryAutoLoadExtension(context.client, extension)) { -+ throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", path, -+ extension); -+ } -+ if (access_mode == AccessMode::AUTOMATIC) { -+ // Attaching of remote files gets bumped to READ_ONLY -+ // This is due to the fact that on most (all?) remote files writes to DB are not available -+ // and having this raised later is not super helpful -+ access_mode = AccessMode::READ_ONLY; -+ } -+ if (access_mode == AccessMode::READ_WRITE) { -+ auto attached_mode = EnumUtil::ToString(access_mode); -+ throw BinderException("Remote database \"%s\" can't be attached in %s mode", -+ name, attached_mode); -+ } -+ } -+ - // get the database type and attach the database - db_manager.GetDatabaseType(context.client, db_type, *info, config, unrecognized_option); - auto attached_db = db_manager.AttachDatabase(context.client, *info, db_type, access_mode); -diff --git a/src/include/duckdb/common/file_open_flags.hpp b/src/include/duckdb/common/file_open_flags.hpp -index d0509a214b..1b5107e849 100644 ---- a/src/include/duckdb/common/file_open_flags.hpp -+++ b/src/include/duckdb/common/file_open_flags.hpp -@@ -100,8 +100,9 @@ public: - return flags & FILE_FLAGS_PARALLEL_ACCESS; - } - --private: - idx_t flags = 0; -+ -+private: - FileLockType lock = FileLockType::NO_LOCK; - FileCompressionType compression = FileCompressionType::UNCOMPRESSED; - }; -diff --git a/src/include/duckdb/common/file_system.hpp b/src/include/duckdb/common/file_system.hpp -index e0df2f70c2..fa2b938199 100644 ---- a/src/include/duckdb/common/file_system.hpp -+++ b/src/include/duckdb/common/file_system.hpp -@@ -238,6 +238,7 @@ public: - - //! Whether or not a file is remote or local, based only on file path - DUCKDB_API static bool IsRemoteFile(const string &path); -+ DUCKDB_API static bool IsRemoteFile(const string &path, string &extension); - - DUCKDB_API virtual void SetDisabledFileSystems(const vector &names); - diff --git a/src/include/duckdb/main/extension_install_info.hpp b/src/include/duckdb/main/extension_install_info.hpp index 64b7b520a7..b03f64b3aa 100644 --- a/src/include/duckdb/main/extension_install_info.hpp diff --git a/duckdb_patches/file_open_flags.patch b/duckdb_patches/file_open_flags.patch new file mode 100644 index 000000000..52a64d0e9 --- /dev/null +++ b/duckdb_patches/file_open_flags.patch @@ -0,0 +1,15 @@ +diff --git a/src/include/duckdb/common/file_open_flags.hpp b/src/include/duckdb/common/file_open_flags.hpp +index d0509a214b..1b5107e849 100644 +--- a/src/include/duckdb/common/file_open_flags.hpp ++++ b/src/include/duckdb/common/file_open_flags.hpp +@@ -100,8 +100,9 @@ public: + return flags & FILE_FLAGS_PARALLEL_ACCESS; + } + +-private: + idx_t flags = 0; ++ ++private: + FileLockType lock = FileLockType::NO_LOCK; + FileCompressionType compression = FileCompressionType::UNCOMPRESSED; + }; diff --git a/duckdb_patches/is_remote_file.absorbed.patch b/duckdb_patches/is_remote_file.absorbed.patch new file mode 100644 index 000000000..ad31782a9 --- /dev/null +++ b/duckdb_patches/is_remote_file.absorbed.patch @@ -0,0 +1,52 @@ +diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp +index 27160adc3f..f3dfc7441b 100644 +--- a/src/common/file_system.cpp ++++ b/src/common/file_system.cpp +@@ -623,9 +623,14 @@ FileType FileHandle::GetType() { + } + + bool FileSystem::IsRemoteFile(const string &path) { +- const string prefixes[] = {"http://", "https://", "s3://", "s3a://", "s3n://", "gcs://", "gs://", "r2://", "hf://"}; +- for (auto &prefix : prefixes) { +- if (StringUtil::StartsWith(path, prefix)) { ++ string extension = ""; ++ return IsRemoteFile(path, extension); ++} ++ ++bool FileSystem::IsRemoteFile(const string &path, string &extension) { ++ for (const auto &entry : EXTENSION_FILE_PREFIXES) { ++ if (StringUtil::StartsWith(path, entry.name)) { ++ extension = entry.extension; + return true; + } + } +diff --git a/src/execution/operator/schema/physical_attach.cpp b/src/execution/operator/schema/physical_attach.cpp +index 2c2b76a0fc..2d835441c2 100644 +--- a/src/execution/operator/schema/physical_attach.cpp ++++ b/src/execution/operator/schema/physical_attach.cpp +@@ -96,6 +96,25 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c + } + } + ++ string extension = ""; ++ if (FileSystem::IsRemoteFile(path, extension)) { ++ if (!ExtensionHelper::TryAutoLoadExtension(context.client, extension)) { ++ throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", path, ++ extension); ++ } ++ if (access_mode == AccessMode::AUTOMATIC) { ++ // Attaching of remote files gets bumped to READ_ONLY ++ // This is due to the fact that on most (all?) remote files writes to DB are not available ++ // and having this raised later is not super helpful ++ access_mode = AccessMode::READ_ONLY; ++ } ++ if (access_mode == AccessMode::READ_WRITE) { ++ auto attached_mode = EnumUtil::ToString(access_mode); ++ throw BinderException("Remote database \"%s\" can't be attached in %s mode", ++ name, attached_mode); ++ } ++ } ++ + // get the database type and attach the database + db_manager.GetDatabaseType(context.client, db_type, *info, config, unrecognized_option); + auto attached_db = db_manager.AttachDatabase(context.client, *info, db_type, access_mode); diff --git a/duckdb_patches/loadable_json.patch b/duckdb_patches/loadable_json.patch new file mode 100644 index 000000000..82a125340 --- /dev/null +++ b/duckdb_patches/loadable_json.patch @@ -0,0 +1,12 @@ +diff --git a/extension/json/CMakeLists.txt b/extension/json/CMakeLists.txt +index 4101111df8..3e035e80d3 100644 +--- a/extension/json/CMakeLists.txt ++++ b/extension/json/CMakeLists.txt +@@ -34,6 +34,7 @@ set(JSON_EXTENSION_FILES + build_static_extension(json ${JSON_EXTENSION_FILES}) + set(PARAMETERS "-warnings") + build_loadable_extension(json ${PARAMETERS} ${JSON_EXTENSION_FILES}) ++target_link_libraries(json_loadable_extension duckdb_yyjson) + + install( + TARGETS json_extension diff --git a/duckdb_patches/remote_attach.partial.patch b/duckdb_patches/remote_attach.partial.patch new file mode 100644 index 000000000..e5955cc08 --- /dev/null +++ b/duckdb_patches/remote_attach.partial.patch @@ -0,0 +1,30 @@ +diff --git a/src/execution/operator/schema/physical_attach.cpp b/src/execution/operator/schema/physical_attach.cpp +index 2c2b76a0fc..2d835441c2 100644 +--- a/src/execution/operator/schema/physical_attach.cpp ++++ b/src/execution/operator/schema/physical_attach.cpp +@@ -96,6 +96,25 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c + } + } + ++ string extension = ""; ++ if (FileSystem::IsRemoteFile(path, extension)) { ++ if (!ExtensionHelper::TryAutoLoadExtension(context.client, extension)) { ++ throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", path, ++ extension); ++ } ++ if (access_mode == AccessMode::AUTOMATIC) { ++ // Attaching of remote files gets bumped to READ_ONLY ++ // This is due to the fact that on most (all?) remote files writes to DB are not available ++ // and having this raised later is not super helpful ++ access_mode = AccessMode::READ_ONLY; ++ } ++ if (access_mode == AccessMode::READ_WRITE) { ++ auto attached_mode = EnumUtil::ToString(access_mode); ++ throw BinderException("Remote database \"%s\" can't be attached in %s mode", ++ name, attached_mode); ++ } ++ } ++ + // get the database type and attach the database + db_manager.GetDatabaseType(context.client, db_type, *info, config, unrecognized_option); + auto attached_db = db_manager.AttachDatabase(context.client, *info, db_type, access_mode);