From e517a903128531a36b4ab26cdec9496431957262 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 15 May 2023 12:58:30 -0500 Subject: [PATCH] Unify ca_path and ca_file configuration parameters --- scripts/ci/posix/build-services-stop.sh | 2 + scripts/run-azurite.sh | 11 + scripts/run-gcs-emu.sh | 18 +- scripts/run-ssl-proxy.py | 151 +++++++++++ test/CMakeLists.txt | 1 + test/src/unit-capi-config.cc | 8 + test/src/unit-cppapi-config.cc | 2 +- test/src/unit-ssl-config.cc | 320 ++++++++++++++++++++++++ tiledb/CMakeLists.txt | 1 + tiledb/sm/config/config.cc | 10 + tiledb/sm/config/config.h | 12 + tiledb/sm/filesystem/azure.cc | 53 ++++ tiledb/sm/filesystem/gcs.cc | 24 +- tiledb/sm/filesystem/gcs.h | 7 + tiledb/sm/filesystem/s3.cc | 110 +++++--- tiledb/sm/filesystem/ssl_config.cc | 128 ++++++++++ tiledb/sm/filesystem/ssl_config.h | 83 ++++++ tiledb/sm/rest/curl.cc | 26 +- 18 files changed, 909 insertions(+), 58 deletions(-) create mode 100755 scripts/run-ssl-proxy.py create mode 100644 test/src/unit-ssl-config.cc create mode 100644 tiledb/sm/filesystem/ssl_config.cc create mode 100644 tiledb/sm/filesystem/ssl_config.h diff --git a/scripts/ci/posix/build-services-stop.sh b/scripts/ci/posix/build-services-stop.sh index c4a767917f2..71d944f63df 100755 --- a/scripts/ci/posix/build-services-stop.sh +++ b/scripts/ci/posix/build-services-stop.sh @@ -3,12 +3,14 @@ # GCS emulator if [[ "$TILEDB_CI_BACKEND" == "GCS" ]] && [[ "$GCS_PID" ]]; then kill -9 "$GCS_PID" || true # failure to stop should not fail job + kill -9 "$GCS_SSL_PID" || true fi # Azure emulator if [[ "$TILEDB_CI_BACKEND" = "AZURE" ]] && [[ "$AZURITE_PID" ]]; then # Kill the running Azurite server kill -n 9 "$AZURITE_PID" || true + kill -n 9 "$AZURITE_SSL_PID" || true fi if [[ "$TILEDB_CI_BACKEND" == "S3" ]] && [[ "$TILEDB_CI_OS" == "macOS" ]]; then diff --git a/scripts/run-azurite.sh b/scripts/run-azurite.sh index 086bbe815dc..738a109e708 100755 --- a/scripts/run-azurite.sh +++ b/scripts/run-azurite.sh @@ -23,6 +23,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # +set -xe # Starts an Azurite server @@ -38,11 +39,21 @@ run_azurite() { export AZURITE_PID=$! } +run_azurite_ssl_proxy() { + $DIR/run-ssl-proxy.py \ + --source-port 10001 \ + --target-port 10000 \ + --public-certificate /tmp/azurite-data/test_certs/public.crt \ + --private-key /tmp/azurite-data/test_certs/private.key & + export AZURITE_SSL_PID=$! +} + run() { mkdir -p /tmp/azurite-data cp -f -r $DIR/../test/inputs/test_certs /tmp/azurite-data run_azurite + run_azurite_ssl_proxy } run diff --git a/scripts/run-gcs-emu.sh b/scripts/run-gcs-emu.sh index 1eb7792f12b..b26b2a4dd08 100755 --- a/scripts/run-gcs-emu.sh +++ b/scripts/run-gcs-emu.sh @@ -29,11 +29,13 @@ # This script should be sourced from tiledb/build folder set -xe -export_gcs_env(){ - export CLOUD_STORAGE_EMULATOR_ENDPOINT=http://localhost:9000 # For JSON and XML API +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +export_gcs_env() { + export TILEDB_TEST_GCS_ENDPOINT=http://localhost:9000 # For JSON and XML API } -run_gcs(){ +run_gcs() { pushd . source /tmp/storage-testbench-venv/bin/activate cd /tmp/storage-testbench @@ -43,10 +45,20 @@ run_gcs(){ popd } +run_gcs_ssl_proxy() { + $DIR/run-ssl-proxy.py \ + --source-port 9001 \ + --target-port 9000 \ + --public-certificate $DIR/../test/inputs/test_certs/public.crt \ + --private-key $DIR/../test/inputs/test_certs/private.key & + export GCS_SSL_PID=$! +} + run() { export_gcs_env run_gcs + run_gcs_ssl_proxy } run diff --git a/scripts/run-ssl-proxy.py b/scripts/run-ssl-proxy.py new file mode 100755 index 00000000000..ffdfe82aa76 --- /dev/null +++ b/scripts/run-ssl-proxy.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 + +import argparse as ap +import os +import queue +import select +import socket +import ssl +import sys +import threading + +NUM_THREADS = os.cpu_count() + +def log(*args): + print(*args, flush=True) + +class Session(object): + def __init__(self, sock, addr, dst_port): + self.src_sock = sock + self.addr = addr + self.dst_port = dst_port + self.dst_sock = None + + def close(self): + try: + self.src_sock.close() + except: + pass + try: + self.dst_sock.close() + except: + pass + + def forward(self, from_sock, to_sock): + data = from_sock.recv(1024) + if not data: + self.close() + return False + to_sock.sendall(data) + return True + + def run(self): + self.dst_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.dst_sock.connect(('127.0.0.1', self.dst_port)) + sockets = [self.src_sock, self.dst_sock] + while True: + (rlist, wlist, xlist) = select.select(sockets, [], sockets) + if xlist: + self.close() + return + for r in rlist: + if r == self.src_sock: + if not self.forward(self.src_sock, self.dst_sock): + return + elif r == self.dst_sock: + if not self.forward(self.dst_sock, self.src_sock): + return + else: + self.close() + raise RuntimeError("Unknown socket: {}".format(r)) + +def handle_clients(conn_queue): + while True: + session = conn_queue.get() + if session is None: + return + try: + session.run() + except Exception as e: + log("Error handling client: {}".format(e)) + finally: + log("Client disconnected: {}".format(session.addr)) + +def run_proxy(cfg): + ssl_cert = cfg.public_certificate + ssl_key = cfg.private_key + if not os.path.isfile(ssl_cert): + log("Missing public certificate: {}".format(ssl_cert)) + exit(3) + if not os.path.isfile(ssl_key): + log("Missing private key: {}".format(ssl_key)) + exit(3) + ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ssl_ctx.load_cert_chain(ssl_cert, ssl_key) + + conn_queue = queue.Queue() + + threads = [] + for _ in range(NUM_THREADS): + t = threading.Thread(target=handle_clients, args=(conn_queue,)) + t.daemon = True + t.start() + threads.append(t) + + addr = ("None", 0) + with socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) as listener: + listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + listener.bind(('127.0.0.1', cfg.source_port)) + listener.listen(1024) + msg = "SSL Proxy Server listening at https://127.0.0.1:{}" + log(msg.format(cfg.source_port)) + with ssl_ctx.wrap_socket(listener, server_side=True) as ssock: + while True: + try: + conn, addr = ssock.accept() + log("Client connected: {}".format(addr)) + conn_queue.put(Session(conn, addr, cfg.target_port)) + except Exception as e: + log("Error creationg session for {} : {}".format(addr, e)) + +def parse_args(): + parser = ap.ArgumentParser( + prog = "run-ssl-proxy.py", + description = "A simple SSL Proxy - Not for Production Use" + ) + parser.add_argument("-s", "--source-port", type=int, + help = "Source port on which to accept connections") + parser.add_argument("-d", "--target-port", type=int, + help = "Target port to proxy connections to") + parser.add_argument("-c", "--public-certificate", + help = "The server public certificate to use") + parser.add_argument("-k", "--private-key", + help = "The server private key to use") + args = parser.parse_args() + + if args.source_port is None: + log("Missing source port") + exit(1) + if args.target_port is None: + log("Missing target port") + exit(1) + if args.public_certificate is None: + log("Missing public certificate") + exit(1) + if args.private_key is None: + log("Missing private key") + exit(1) + + return args + +def main(): + log("SSL Proxy Initializing...") + try: + run_proxy(parse_args()) + except KeyboardInterrupt: + pass + finally: + log("SSL Proxy Shutting Down") + +if __name__ == "__main__": + main() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c53882a4efb..cf46f8f96c1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -194,6 +194,7 @@ set(TILEDB_UNIT_TEST_SOURCES src/unit-s3.cc src/unit-sparse-global-order-reader.cc src/unit-sparse-unordered-with-dups-reader.cc + src/unit-ssl-config.cc src/unit-Subarray.cc src/unit-SubarrayPartitioner-dense.cc src/unit-SubarrayPartitioner-error.cc diff --git a/test/src/unit-capi-config.cc b/test/src/unit-capi-config.cc index d5ea4038f04..2a7d4908777 100644 --- a/test/src/unit-capi-config.cc +++ b/test/src/unit-capi-config.cc @@ -291,6 +291,7 @@ void check_save_to_file() { ss << "sm.var_offsets.bitsize 64\n"; ss << "sm.var_offsets.extra_element false\n"; ss << "sm.var_offsets.mode bytes\n"; + ss << "ssl.verify true\n"; ss << "vfs.azure.block_list_block_size 5242880\n"; ss << "vfs.azure.max_parallel_ops " << std::thread::hardware_concurrency() << "\n"; @@ -659,12 +660,17 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { all_param_values["sm.fragment_info.preload_mbrs"] = "true"; all_param_values["sm.partial_tile_offsets_loading"] = "false"; + all_param_values["ssl.ca_file"] = ""; + all_param_values["ssl.ca_path"] = ""; + all_param_values["ssl.verify"] = "true"; + all_param_values["vfs.max_batch_size"] = "104857600"; all_param_values["vfs.min_batch_gap"] = "512000"; all_param_values["vfs.min_batch_size"] = "20971520"; all_param_values["vfs.min_parallel_size"] = "10485760"; all_param_values["vfs.read_ahead_size"] = "102400"; all_param_values["vfs.read_ahead_cache_size"] = "10485760"; + all_param_values["vfs.gcs.endpoint"] = ""; all_param_values["vfs.gcs.project_id"] = ""; all_param_values["vfs.gcs.max_parallel_ops"] = std::to_string(std::thread::hardware_concurrency()); @@ -732,6 +738,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { vfs_param_values["min_parallel_size"] = "10485760"; vfs_param_values["read_ahead_size"] = "102400"; vfs_param_values["read_ahead_cache_size"] = "10485760"; + vfs_param_values["gcs.endpoint"] = ""; vfs_param_values["gcs.project_id"] = ""; vfs_param_values["gcs.max_parallel_ops"] = std::to_string(std::thread::hardware_concurrency()); @@ -793,6 +800,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { vfs_param_values["hdfs.name_node_uri"] = ""; std::map gcs_param_values; + gcs_param_values["endpoint"] = ""; gcs_param_values["project_id"] = ""; gcs_param_values["max_parallel_ops"] = std::to_string(std::thread::hardware_concurrency()); diff --git a/test/src/unit-cppapi-config.cc b/test/src/unit-cppapi-config.cc index 426e9e3936c..941431ac9bc 100644 --- a/test/src/unit-cppapi-config.cc +++ b/test/src/unit-cppapi-config.cc @@ -67,7 +67,7 @@ TEST_CASE("C++ API: Config iterator", "[cppapi][config]") { names.push_back(it->first); } // Check number of VFS params in default config object. - CHECK(names.size() == 62); + CHECK(names.size() == 63); } TEST_CASE("C++ API: Config Environment Variables", "[cppapi][config]") { diff --git a/test/src/unit-ssl-config.cc b/test/src/unit-ssl-config.cc new file mode 100644 index 00000000000..d0cad3333c1 --- /dev/null +++ b/test/src/unit-ssl-config.cc @@ -0,0 +1,320 @@ +/** + * @file unit-ssl-config.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests for S3 SSL Configuration + */ + +#include +#include "tiledb/platform/platform.h" +#include "tiledb/sm/config/config.h" +#include "tiledb/sm/enums/filesystem.h" +#include "tiledb/sm/filesystem/uri.h" +#include "tiledb/sm/filesystem/vfs.h" +#include "tiledb/sm/storage_manager/context.h" + +using namespace tiledb; +using namespace tiledb::sm; + +const static std::string bucket_name = "ssl-config-test"; + +// We're telling libcurl to use a non-standard root certificate when +// evaluating whether the SSL connection is secure. On macOS where libcurl +// links against SecureTransport by default, a non-standard root certificate +// requires storing it in the system keychain which we absolutely do not +// want ever. I assume there's similar issues on Windows but I have not +// tested it myself. Regardless, the only real use case for this feature +// is in Docker containers that don't include a ca-certificates package +// so restricting to Linux should be fine. +#define REQUIRES_LINUX() \ + do { \ + if (!platform::is_os_linux) { \ + return; \ + } \ + } while (0) + +std::string get_test_ca_path(); +std::string get_test_ca_file(); +void check_failure(Filesystem fs, Config& cfg); +void check_success(Filesystem fs, Config& cfg); + +Config azure_base_config() { + Config cfg; + + std::string azure_user = "devstoreaccount1"; + std::string azure_key = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDX" + "J1OUzFT50uSRZ6IFsuFq2UVErCz4I6" + "tq/K1SZFPTOtr/KBHBeksoGMGw=="; + std::string azure_endpoint = "https://localhost:10001/devstoreaccount1"; + + REQUIRE(cfg.set("vfs.azure.storage_account_name", azure_user).ok()); + REQUIRE(cfg.set("vfs.azure.storage_account_key", azure_key).ok()); + REQUIRE(cfg.set("vfs.azure.blob_endpoint", azure_endpoint).ok()); + + return cfg; +} + +TEST_CASE("Azure - Connection Error", "[ssl_config][azure]") { + // Show that SSL connections without configuration are broken + // so that the other tests show that setting the config values + // actually works rather than me not realizing I accidentally + // set an http endpoint instead of https. + auto cfg = azure_base_config(); + check_failure(Filesystem::AZURE, cfg); +} + +TEST_CASE("Azure - Verify False - ssl.verify", "[ssl_config][azure]") { + // For some reason, Windows fails to disable SSL validation. Given + // that this is only a test to ensure that we've got SSL turned on in + // for testing we just disable it since we really don't want uses + // running with verify=false in the general case. + REQUIRES_LINUX(); + + auto cfg = azure_base_config(); + REQUIRE(cfg.set("ssl.verify", "false").ok()); + check_success(Filesystem::AZURE, cfg); +} + +TEST_CASE("Azure - CAINFO - ssl.ca_file", "[ssl_config][azure]") { + REQUIRES_LINUX(); + + auto cfg = azure_base_config(); + REQUIRE(cfg.set("ssl.verify", "true").ok()); + REQUIRE(cfg.set("ssl.ca_file", get_test_ca_file()).ok()); + check_success(Filesystem::AZURE, cfg); +} + +TEST_CASE("Azure - CAPATH - ssl.ca_path", "[ssl_config][azure]") { + REQUIRES_LINUX(); + + auto cfg = azure_base_config(); + REQUIRE(cfg.set("ssl.verify_ssl", "true").ok()); + REQUIRE(cfg.set("ssl.ca_path", get_test_ca_path()).ok()); + + // The Azure client does not support setting the CAPATH in libcurl so + // this is an expected failure. + check_failure(Filesystem::AZURE, cfg); +} + +Config gcs_base_config() { + Config cfg; + + REQUIRE(cfg.set("vfs.gcs.endpoint", "https://localhost:9001").ok()); + + return cfg; +} + +TEST_CASE("GCS - Connection Error", "[ssl_config][gcs]") { + // Show that SSL connections without configuration are broken + // so that the other tests show that setting the config values + // actually works rather than me not realizing I accidentally + // set an http endpoint instead of https. + auto cfg = gcs_base_config(); + check_failure(Filesystem::GCS, cfg); +} + +TEST_CASE("GCS - Verify False - ssl.verify", "[ssl_config][gcs]") { + // GCS does not allow disabling SSL verification through + // its API so we require this to be a failure as well. + auto cfg = gcs_base_config(); + REQUIRE(cfg.set("ssl.verify", "false").ok()); + check_failure(Filesystem::GCS, cfg); +} + +TEST_CASE("GCS - CAINFO - ssl.ca_file", "[ssl_config][gcs]") { + REQUIRES_LINUX(); + + auto cfg = gcs_base_config(); + REQUIRE(cfg.set("ssl.verify", "true").ok()); + REQUIRE(cfg.set("ssl.ca_file", get_test_ca_file()).ok()); + check_success(Filesystem::GCS, cfg); +} + +TEST_CASE("GCS - CAPATH - ssl.ca_path", "[ssl_config][gcs]") { + auto cfg = gcs_base_config(); + REQUIRE(cfg.set("ssl.verify_ssl", "true").ok()); + REQUIRE(cfg.set("ssl.ca_path", get_test_ca_path()).ok()); + + // The GCS client does not support setting the CAPATH in libcurl so + // this is an expected failure. + check_failure(Filesystem::GCS, cfg); +} + +Config s3_base_config() { + Config cfg; + + REQUIRE(cfg.set("vfs.s3.endpoint_override", "localhost:9999").ok()); + REQUIRE(cfg.set("vfs.s3.scheme", "https").ok()); + REQUIRE(cfg.set("vfs.s3.use_virtual_addressing", "false").ok()); + REQUIRE(cfg.set("vfs.s3.verify_ssl", "true").ok()); + + return cfg; +} + +TEST_CASE("S3 - Connection Error", "[ssl_config][s3][yarps]") { + // Show that SSL connections without configuration are broken + // so that the other tests show that setting the config values + // actually works rather than me not realizing I accidentally + // set an http endpoint instead of https. + auto cfg = s3_base_config(); + REQUIRE(cfg.set("vfs.s3.logging_level", "trace").ok()); + std::cerr << "S3 - Connection Error: " << std::this_thread::get_id() + << std::endl; + check_failure(Filesystem::S3, cfg); +} + +TEST_CASE("S3 - Verify False - vfs.s3.verify_ssl", "[ssl_config][s3]") { + std::cerr << "S3 - Verify False - vfs.s3.verify_ssl: " + << std::this_thread::get_id() << std::endl; + auto cfg = s3_base_config(); + REQUIRE(cfg.set("vfs.s3.verify_ssl", "false").ok()); + check_success(Filesystem::S3, cfg); +} + +TEST_CASE("S3 - Verify False - ssl.verify", "[ssl_config][s3]") { + auto cfg = s3_base_config(); + REQUIRE(cfg.set("ssl.verify", "false").ok()); + check_success(Filesystem::S3, cfg); +} + +TEST_CASE("S3 - CAINFO - vfs.s3.ca_file", "[ssl_config][s3]") { + REQUIRES_LINUX(); + + auto cfg = s3_base_config(); + REQUIRE(cfg.set("vfs.s3.verify_ssl", "true").ok()); + REQUIRE(cfg.set("vfs.s3.ca_file", get_test_ca_file()).ok()); + check_success(Filesystem::S3, cfg); +} + +TEST_CASE("S3 - CAINFO - ssl.ca_file", "[ssl_config][s3]") { + REQUIRES_LINUX(); + + auto cfg = s3_base_config(); + REQUIRE(cfg.set("ssl.verify", "true").ok()); + REQUIRE(cfg.set("ssl.ca_file", get_test_ca_file()).ok()); + check_success(Filesystem::S3, cfg); +} + +TEST_CASE("S3 - CAPATH - vfs.s3.ca_path", "[ssl_config][s3]") { + REQUIRES_LINUX(); + + auto cfg = s3_base_config(); + REQUIRE(cfg.set("vfs.s3.verify_ssl", "true").ok()); + REQUIRE(cfg.set("vfs.s3.ca_path", get_test_ca_path()).ok()); + check_success(Filesystem::S3, cfg); +} + +TEST_CASE("S3 - CAPATH - ssl.ca_path", "[ssl_config][s3]") { + REQUIRES_LINUX(); + + auto cfg = s3_base_config(); + REQUIRE(cfg.set("ssl.verify", "true").ok()); + REQUIRE(cfg.set("ssl.ca_path", get_test_ca_path()).ok()); + check_success(Filesystem::S3, cfg); +} + +std::string get_test_ca_path() { + return std::string(TILEDB_TEST_INPUTS_DIR) + "/test_certs/"; +} + +std::string get_test_ca_file() { + return get_test_ca_path() + "public.crt"; +} + +void check_failure(Filesystem fs, Config& cfg) { + std::cerr << "CHECK FAILURE: " << std::this_thread::get_id() << std::endl; + Context ctx(cfg); + auto& vfs = ctx.resources().vfs(); + + if (!vfs.supports_fs(fs)) { + return; + } + + std::string scheme; + + if (fs == Filesystem::AZURE) { + scheme = "azure"; + } else if (fs == Filesystem::GCS) { + scheme = "gcs"; + } else if (fs == Filesystem::S3) { + scheme = "s3"; + } else { + throw std::invalid_argument("Invalid fs value: " + filesystem_str(fs)); + } + + URI bucket_uri = URI(scheme + "://" + bucket_name); + + Status st; + bool is_bucket; + + try { + st = vfs.is_bucket(bucket_uri, &is_bucket); + } catch (...) { + // Some backends throw exceptions to signal SSL error conditions + // so we pass the test by returning early here. + return; + } + + // Otherwise, make sure we get a failure status + REQUIRE(!st.ok()); +} + +void check_success(Filesystem fs, Config& cfg) { + std::cerr << "CHECK SUCCESS: " << std::this_thread::get_id() << std::endl; + Context ctx(cfg); + auto& vfs = ctx.resources().vfs(); + + if (!vfs.supports_fs(fs)) { + return; + } + + std::string scheme; + + if (fs == Filesystem::AZURE) { + scheme = "azure"; + } else if (fs == Filesystem::GCS) { + scheme = "gcs"; + } else if (fs == Filesystem::S3) { + scheme = "s3"; + } else { + throw std::invalid_argument("Invalid fs value: " + filesystem_str(fs)); + } + + URI bucket_uri = URI(scheme + "://" + bucket_name); + + bool is_bucket; + throw_if_not_ok(vfs.is_bucket(bucket_uri, &is_bucket)); + if (is_bucket) { + throw_if_not_ok(vfs.remove_bucket(bucket_uri)); + } + throw_if_not_ok(vfs.create_bucket(bucket_uri)); + + throw_if_not_ok(vfs.is_bucket(bucket_uri, &is_bucket)); + REQUIRE(is_bucket); +} diff --git a/tiledb/CMakeLists.txt b/tiledb/CMakeLists.txt index 932c37fbc1a..c14e29a0d69 100644 --- a/tiledb/CMakeLists.txt +++ b/tiledb/CMakeLists.txt @@ -192,6 +192,7 @@ set(TILEDB_CORE_SOURCES ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/s3.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/s3_thread_pool_executor.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/s3/STSProfileWithWebIdentityCredentialsProvider.cc + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/ssl_config.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/uri.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/vfs.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/vfs_file_handle.cc diff --git a/tiledb/sm/config/config.cc b/tiledb/sm/config/config.cc index b042dab8262..bc93439fae3 100644 --- a/tiledb/sm/config/config.cc +++ b/tiledb/sm/config/config.cc @@ -154,6 +154,9 @@ const std::string Config::SM_GROUP_TIMESTAMP_START = "0"; const std::string Config::SM_GROUP_TIMESTAMP_END = std::to_string(UINT64_MAX); const std::string Config::SM_FRAGMENT_INFO_PRELOAD_MBRS = "false"; const std::string Config::SM_PARTIAL_TILE_OFFSETS_LOADING = "false"; +const std::string Config::SSL_CA_FILE = ""; +const std::string Config::SSL_CA_PATH = ""; +const std::string Config::SSL_VERIFY = "true"; const std::string Config::VFS_MIN_PARALLEL_SIZE = "10485760"; const std::string Config::VFS_MAX_BATCH_SIZE = "104857600"; const std::string Config::VFS_MIN_BATCH_GAP = "512000"; @@ -174,6 +177,7 @@ const std::string Config::VFS_AZURE_USE_BLOCK_LIST_UPLOAD = "true"; const std::string Config::VFS_AZURE_MAX_RETRIES = "5"; const std::string Config::VFS_AZURE_RETRY_DELAY_MS = "800"; const std::string Config::VFS_AZURE_MAX_RETRY_DELAY_MS = "60000"; +const std::string Config::VFS_GCS_ENDPOINT = ""; const std::string Config::VFS_GCS_PROJECT_ID = ""; const std::string Config::VFS_GCS_MAX_PARALLEL_OPS = Config::SM_IO_CONCURRENCY_LEVEL; @@ -352,6 +356,9 @@ const std::map default_config_values = { std::make_pair( "sm.partial_tile_offsets_loading", Config::SM_PARTIAL_TILE_OFFSETS_LOADING), + std::make_pair("ssl.ca_file", Config::SSL_CA_FILE), + std::make_pair("ssl.ca_path", Config::SSL_CA_PATH), + std::make_pair("ssl.verify", Config::SSL_VERIFY), std::make_pair("vfs.min_parallel_size", Config::VFS_MIN_PARALLEL_SIZE), std::make_pair("vfs.max_batch_size", Config::VFS_MAX_BATCH_SIZE), std::make_pair("vfs.min_batch_gap", Config::VFS_MIN_BATCH_GAP), @@ -388,6 +395,7 @@ const std::map default_config_values = { "vfs.azure.retry_delay_ms", Config::VFS_AZURE_RETRY_DELAY_MS), std::make_pair( "vfs.azure.max_retry_delay_ms", Config::VFS_AZURE_MAX_RETRY_DELAY_MS), + std::make_pair("vfs.gcs.endpoint", Config::VFS_GCS_ENDPOINT), std::make_pair("vfs.gcs.project_id", Config::VFS_GCS_PROJECT_ID), std::make_pair( "vfs.gcs.max_parallel_ops", Config::VFS_GCS_MAX_PARALLEL_OPS), @@ -732,6 +740,8 @@ Status Config::sanity_check( Status_ConfigError("Invalid offsets format parameter value")); } else if (param == "sm.fragment_info.preload_mbrs") { RETURN_NOT_OK(utils::parse::convert(value, &v)); + } else if (param == "ssl.verify") { + RETURN_NOT_OK(utils::parse::convert(value, &v)); } else if (param == "vfs.min_parallel_size") { RETURN_NOT_OK(utils::parse::convert(value, &vuint64)); } else if (param == "vfs.max_batch_size") { diff --git a/tiledb/sm/config/config.h b/tiledb/sm/config/config.h index 5e9610320cc..82a8cf4f8db 100644 --- a/tiledb/sm/config/config.h +++ b/tiledb/sm/config/config.h @@ -362,6 +362,15 @@ class Config { /** If `true` the readers might partially load/unload tile offsets. */ static const std::string SM_PARTIAL_TILE_OFFSETS_LOADING; + /** Certificate file path. */ + static const std::string SSL_CA_FILE; + + /** Certificate directory path. */ + static const std::string SSL_CA_PATH; + + /** Whether to verify SSL connections. */ + static const std::string SSL_VERIFY; + /** The default minimum number of bytes in a parallel VFS operation. */ static const std::string VFS_MIN_PARALLEL_SIZE; @@ -421,6 +430,9 @@ class Config { /** Azure max retry delay. */ static const std::string VFS_AZURE_MAX_RETRY_DELAY_MS; + /** GCS Endpoint. */ + static const std::string VFS_GCS_ENDPOINT; + /** GCS project id. */ static const std::string VFS_GCS_PROJECT_ID; diff --git a/tiledb/sm/filesystem/azure.cc b/tiledb/sm/filesystem/azure.cc index 5f84f729e29..42b8d02c071 100644 --- a/tiledb/sm/filesystem/azure.cc +++ b/tiledb/sm/filesystem/azure.cc @@ -43,10 +43,14 @@ #include "tiledb/common/stdx_string.h" #include "tiledb/platform/cert_file.h" #include "tiledb/sm/filesystem/azure.h" +#include "tiledb/sm/filesystem/ssl_config.h" #include "tiledb/sm/misc/parallel_functions.h" #include "tiledb/sm/misc/tdb_math.h" #include "tiledb/sm/misc/utils.h" +static std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( + tiledb::sm::SSLConfig& ssl_cfg); + using namespace tiledb::common; using tiledb::common::filesystem::directory_entry; @@ -173,6 +177,9 @@ Status Azure::init(const Config& config, ThreadPool* const thread_pool) { options.Retry.RetryDelay = retry_delay_; options.Retry.MaxRetryDelay = max_retry_delay; + SSLConfig ssl_cfg = SSLConfig(config); + options.Transport.Transport = create_transport(ssl_cfg); + // Construct the Azure SDK blob service client. // We pass a shared key if it was specified. if (!account_key.empty()) { @@ -1108,4 +1115,50 @@ std::string Azure::BlockListUploadState::next_block_id() { } // namespace sm } // namespace tiledb +#if defined(_WIN32) +#include +std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( + tiledb::sm::SSLConfig& ssl_cfg) { + ::Azure::Core::Http::WinHttpTransportOptions transport_opts; + + if (!ssl_cfg.ca_file().empty()) { + LOG_WARN("Azure ignores the `ssl.ca_file` configuration key on Windows."); + } + + if (!ssl_cfg.ca_path().empty()) { + LOG_WARN("Azure ignores the `ssl.ca_path` configuration key on Windows."); + } + + if (ssl_cfg.verify() == false) { + transport_opts.IgnoreUnknownCertificateAuthority = true; + } + + return make_shared<::Azure::Core::Http::WinHttpTransport>( + HERE(), transport_opts); +} +#else +#include +std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( + tiledb::sm::SSLConfig& ssl_cfg) { + ::Azure::Core::Http::CurlTransportOptions transport_opts; + + if (!ssl_cfg.ca_file().empty()) { + transport_opts.CAInfo = ssl_cfg.ca_file(); + } + + if (!ssl_cfg.ca_path().empty()) { + LOG_WARN( + "Azure ignores the `ssl.ca_path` configuration key, " + "use `ssl.ca_file` instead"); + } + + if (ssl_cfg.verify() == false) { + transport_opts.SslVerifyPeer = false; + } + + return make_shared<::Azure::Core::Http::CurlTransport>( + HERE(), transport_opts); +} +#endif + #endif diff --git a/tiledb/sm/filesystem/gcs.cc b/tiledb/sm/filesystem/gcs.cc index 4a1f0f523fd..48e039d319b 100644 --- a/tiledb/sm/filesystem/gcs.cc +++ b/tiledb/sm/filesystem/gcs.cc @@ -82,11 +82,18 @@ Status GCS::init(const Config& config, ThreadPool* const thread_pool) { Status_GCSError("Can't initialize with null thread pool.")); } + ssl_cfg_ = SSLConfig(config); + assert(state_ == State::UNINITIALIZED); thread_pool_ = thread_pool; bool found; + endpoint_ = config.get("vfs.gcs.endpoint", &found); + assert(found); + if (endpoint_.empty() && getenv("TILEDB_TEST_GCS_ENDPOINT")) { + endpoint_ = getenv("TILEDB_TEST_GCS_ENDPOINT"); + } project_id_ = config.get("vfs.gcs.project_id", &found); assert(found); RETURN_NOT_OK(config.get( @@ -120,12 +127,14 @@ Status GCS::init_client() const { } google::cloud::storage::ChannelOptions channel_options; + if (!ssl_cfg_.ca_file().empty()) { + channel_options.set_ssl_root_path(ssl_cfg_.ca_file()); + } - if constexpr (tiledb::platform::PlatformCertFile::enabled) { - const std::string cert_file = tiledb::platform::PlatformCertFile::get(); - if (!cert_file.empty()) { - channel_options.set_ssl_root_path(cert_file); - } + if (!ssl_cfg_.ca_path().empty()) { + LOG_WARN( + "GCS ignores the `ssl.ca_path` configuration key, " + "use `ssl.ca_file` instead"); } // Note that the order here is *extremely important* @@ -143,7 +152,7 @@ Status GCS::init_client() const { // env variable GOOGLE_APPLICATION_CREDENTIALS try { shared_ptr creds = nullptr; - if (getenv("CLOUD_STORAGE_EMULATOR_ENDPOINT")) { + if (!endpoint_.empty() || getenv("CLOUD_STORAGE_EMULATOR_ENDPOINT")) { creds = google::cloud::storage::oauth2::CreateAnonymousCredentials(); } else { auto status_or_creds = @@ -158,6 +167,9 @@ Status GCS::init_client() const { } google::cloud::storage::ClientOptions client_options( creds, channel_options); + if (!endpoint_.empty()) { + client_options.set_endpoint(endpoint_); + } auto client = google::cloud::storage::Client( client_options, google::cloud::storage::LimitedTimeRetryPolicy( diff --git a/tiledb/sm/filesystem/gcs.h b/tiledb/sm/filesystem/gcs.h index 036174b3495..a84d3adcd9f 100644 --- a/tiledb/sm/filesystem/gcs.h +++ b/tiledb/sm/filesystem/gcs.h @@ -43,6 +43,7 @@ #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/config/config.h" #include "tiledb/sm/curl/curl_init.h" +#include "tiledb/sm/filesystem/ssl_config.h" #include "tiledb/sm/misc/constants.h" #include "uri.h" @@ -401,6 +402,9 @@ class GCS { /** The current state. */ State state_; + /** SSLConfig options. */ + SSLConfig ssl_cfg_; + /** * Mutex protecting client initialization. This is mutable so that nominally * const functions can call init_client(). @@ -410,6 +414,9 @@ class GCS { /** The VFS thread pool. */ ThreadPool* thread_pool_; + // The GCS endpoint. + std::string endpoint_; + // The GCS project id. std::string project_id_; diff --git a/tiledb/sm/filesystem/s3.cc b/tiledb/sm/filesystem/s3.cc index 9302ec86a60..4f740867370 100644 --- a/tiledb/sm/filesystem/s3.cc +++ b/tiledb/sm/filesystem/s3.cc @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,8 @@ #include "tiledb/common/logger.h" #include "tiledb/common/unique_rwlock.h" -#include "tiledb/platform/cert_file.h" +#include "tiledb/platform/platform.h" +#include "tiledb/sm/filesystem/ssl_config.h" #include "tiledb/sm/global_state/unit_test_config.h" #include "tiledb/sm/misc/tdb_math.h" #include "tiledb/sm/misc/utils.h" @@ -173,10 +175,44 @@ namespace { */ template std::string outcome_error_message(const Aws::Utils::Outcome& outcome) { - return std::string("\nException: ") + - outcome.GetError().GetExceptionName().c_str() + - std::string("\nError message: ") + - outcome.GetError().GetMessage().c_str(); + if (outcome.IsSuccess()) { + return "Success"; + } + + auto err = outcome.GetError(); + Aws::StringStream ss; + + ss << "[Error Type: " << static_cast(err.GetErrorType()) << "]" + << " [HTTP Response Code: " << static_cast(err.GetResponseCode()) + << "]"; + + if (!err.GetExceptionName().empty()) { + ss << " [Exception: " << err.GetExceptionName() << "]"; + } + + // For some reason, these symbols are not exposed when building with MINGW + // so for now we just disable adding the tags on Windows. + if constexpr (!platform::is_os_windows) { + if (!err.GetRemoteHostIpAddress().empty()) { + ss << " [Remote IP: " << err.GetRemoteHostIpAddress() << "]"; + } + + if (!err.GetRequestId().empty()) { + ss << " [Request ID: " << err.GetRequestId() << "]"; + } + } + + if (err.GetResponseHeaders().size() > 0) { + ss << " [Headers:"; + for (auto&& h : err.GetResponseHeaders()) { + ss << " '" << h.first << "' = '" << h.second << "'"; + } + ss << "]"; + } + + ss << " : " << err.GetMessage(); + + return ss.str(); } } // namespace @@ -746,9 +782,23 @@ Status S3::is_bucket(const URI& uri, bool* const exists) const { Aws::S3::Model::HeadBucketRequest head_bucket_request; head_bucket_request.SetBucket(aws_uri.GetAuthority()); auto head_bucket_outcome = client_->HeadBucket(head_bucket_request); - *exists = head_bucket_outcome.IsSuccess(); - return Status::Ok(); + if (head_bucket_outcome.IsSuccess()) { + *exists = true; + return Status::Ok(); + } + + auto err = head_bucket_outcome.GetError(); + + if (err.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_BUCKET || + err.GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) { + *exists = false; + return Status::Ok(); + } + + return LOG_STATUS(Status_S3Error( + "Failed to check if S3 bucket '" + uri.to_string() + + "' exists: " + outcome_error_message(head_bucket_outcome))); } Status S3::is_object(const URI& uri, bool* const exists) const { @@ -776,9 +826,23 @@ Status S3::is_object( if (request_payer_ != Aws::S3::Model::RequestPayer::NOT_SET) head_object_request.SetRequestPayer(request_payer_); auto head_object_outcome = client_->HeadObject(head_object_request); - *exists = head_object_outcome.IsSuccess(); - return Status::Ok(); + if (head_object_outcome.IsSuccess()) { + *exists = true; + return Status::Ok(); + } + + auto err = head_object_outcome.GetError(); + + if (err.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY || + err.GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) { + *exists = false; + return Status::Ok(); + } + + return LOG_STATUS(Status_S3Error( + "Failed to check if S3 object 's3://" + bucket_name + "/" + object_key + + "' exists: " + outcome_error_message(head_object_outcome))); } Status S3::is_dir(const URI& uri, bool* exists) const { @@ -1444,15 +1508,6 @@ Status S3::init_client() const { auto request_timeout_ms = config_.get( "vfs.s3.request_timeout_ms", Config::MustFindMarker()); - auto ca_file = - config_.get("vfs.s3.ca_file", Config::MustFindMarker()); - - auto ca_path = - config_.get("vfs.s3.ca_path", Config::MustFindMarker()); - - auto verify_ssl = - config_.get("vfs.s3.verify_ssl", Config::MustFindMarker()); - auto aws_access_key_id = config_.get( "vfs.s3.aws_access_key_id", Config::MustFindMarker()); @@ -1480,13 +1535,15 @@ Status S3::init_client() const { auto connect_scale_factor = config_.get( "vfs.s3.connect_scale_factor", Config::MustFindMarker()); + SSLConfig ssl_cfg = S3SSLConfig(config_); + client_config.scheme = (s3_scheme == "http") ? Aws::Http::Scheme::HTTP : Aws::Http::Scheme::HTTPS; client_config.connectTimeoutMs = (long)connect_timeout_ms; client_config.requestTimeoutMs = (long)request_timeout_ms; - client_config.caFile = ca_file; - client_config.caPath = ca_path; - client_config.verifySSL = verify_ssl; + client_config.caFile = ssl_cfg.ca_file(); + client_config.caPath = ssl_cfg.ca_path(); + client_config.verifySSL = ssl_cfg.verify(); client_config.retryStrategy = Aws::MakeShared( constants::s3_allocation_tag.c_str(), @@ -1494,17 +1551,6 @@ Status S3::init_client() const { connect_max_tries, connect_scale_factor); - if constexpr (tiledb::platform::PlatformCertFile::enabled) { - // If the user has not set a s3 ca file or ca path then let's attempt to set - // the cert file if we've autodetected it - if (ca_file.empty() && ca_path.empty()) { - const std::string cert_file = tiledb::platform::PlatformCertFile::get(); - if (!cert_file.empty()) { - client_config.caFile = cert_file; - } - } - } - // If the user says not to sign a request, use the // AnonymousAWSCredentialsProvider This is equivalent to --no-sign-request on // the aws cli diff --git a/tiledb/sm/filesystem/ssl_config.cc b/tiledb/sm/filesystem/ssl_config.cc new file mode 100644 index 00000000000..7cedaefaf39 --- /dev/null +++ b/tiledb/sm/filesystem/ssl_config.cc @@ -0,0 +1,128 @@ +/** + * @file ssl_config.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file includes definitions of the SSLConfig class. + */ + +#include "tiledb/sm/filesystem/ssl_config.h" +#include "tiledb/common/logger.h" +#include "tiledb/platform/cert_file.h" + +namespace tiledb::sm { + +SSLConfig::SSLConfig() + : ca_file_("") + , ca_path_("") + , verify_(true) { +} + +SSLConfig::SSLConfig(const Config& cfg) + : ca_file_("") + , ca_path_("") + , verify_(true) { + // Look up our ca_file and ca_path configuration options + auto ca_file = cfg.get("ssl.ca_file"); + if (ca_file.has_value()) { + ca_file_ = ca_file.value(); + } + + auto ca_path = cfg.get("ssl.ca_path"); + if (ca_path.has_value()) { + ca_path_ = ca_path.value(); + } + + if constexpr (tiledb::platform::PlatformCertFile::enabled) { + // If neither ca_file or ca_path are set, we look for a system default + // CA file on Linux platforms. + if (ca_file_.empty() && ca_path_.empty()) { + ca_file_ = tiledb::platform::PlatformCertFile::get(); + } + } + + auto verify = cfg.get("ssl.verify"); + if (verify.has_value()) { + verify_ = verify.value(); + } +} + +S3SSLConfig::S3SSLConfig(const Config& cfg) + : SSLConfig(cfg) { + // Support the old s3 configuration values if they are + // configured by the user. + + // Only set ca_file_ if vfs.s3.ca_file is a non-empty string + auto ca_file = cfg.get("vfs.s3.ca_file"); + if (ca_file.has_value() && !ca_file.value().empty()) { + LOG_WARN( + "The 'vfs.s3.ca_file' configuration option has been replaced " + "with 'ssl.ca_file'. Make sure that you update your configuration " + "because 'vfs.s3.ca_file' will eventually be removed."); + ca_file_ = ca_file.value(); + } + + // Only set ca_path_ if vfs.s3.ca_path is a non-empty string + auto ca_path = cfg.get("vfs.s3.ca_path"); + if (ca_path.has_value() && !ca_path.value().empty()) { + LOG_WARN( + "The 'vfs.s3.ca_path' configuration option has been replaced " + "with 'ssl.ca_path'. Make sure that you update your configuration " + "because 'vfs.s3.ca_path' will eventually be removed."); + ca_path_ = ca_path.value(); + } + + // Only override what was found in `ssl.verify` if `vfs.s3.verify_ssl` is + // set to false (i.e., non-default). Otherwise this will always ignore the + // ssl.verify value. + auto verify = cfg.get("vfs.s3.verify_ssl"); + if (verify.has_value() && !verify.value()) { + LOG_WARN( + "The 'vfs.s3.verify_ssl' configuration option has been replaced " + "with 'ssl.verify'. Make sure that you update your configuration " + "because 'vfs.s3.verify_ssl' will eventually be removed."); + verify_ = verify.value(); + } +} + +RestSSLConfig::RestSSLConfig(const Config& cfg) + : SSLConfig(cfg) { + // Only override what was found in `ssl.verify` if + // `rest.ignore_ssl_verification` is non-default (i.e., true, the naming here + // is backwards from all the other ssl verification key names) + auto skip_verify = cfg.get("rest.ignore_ssl_validation"); + if (skip_verify.has_value() && skip_verify.value()) { + LOG_WARN( + "The 'rest.ignore_ssl_validation = false' configuration option " + "has been replaced with 'ssl.verify = true'. Make sure that you update " + "your configuration because 'rest.ignore_ssl_validation' will " + "eventually be removed."); + verify_ = false; + } +} + +} // namespace tiledb::sm diff --git a/tiledb/sm/filesystem/ssl_config.h b/tiledb/sm/filesystem/ssl_config.h new file mode 100644 index 00000000000..3687fb07015 --- /dev/null +++ b/tiledb/sm/filesystem/ssl_config.h @@ -0,0 +1,83 @@ +/** + * @file ssl_config.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines the SSLConfig class. + */ + +#ifndef TILEDB_SSL_CONFIG_H +#define TILEDB_SSL_CONFIG_H + +#include "tiledb/sm/config/config.h" + +namespace tiledb::sm { + +class SSLConfig { + public: + SSLConfig(); + SSLConfig(const Config& cfg); + + /** Return the CAFile config value. */ + inline const std::string& ca_file() const { + return ca_file_; + } + + /** Return the CAPath config value. */ + inline const std::string& ca_path() const { + return ca_path_; + } + + /** Return whether or not SSL verification should be performed. */ + inline bool verify() const { + return verify_; + } + + protected: + /** Stores a (maybe empty) path to the configured CAFile path. */ + std::string ca_file_; + + /** Stores a (maybe empty) path to the configured CAPath directory. */ + std::string ca_path_; + + /** Stores whether we want to verify SSL connections or not. */ + bool verify_; +}; + +class S3SSLConfig : public SSLConfig { + public: + S3SSLConfig(const Config& cfg); +}; + +class RestSSLConfig : public SSLConfig { + public: + RestSSLConfig(const Config& cfg); +}; + +} // namespace tiledb::sm + +#endif // TILEDB_SSL_CONFIG_H diff --git a/tiledb/sm/rest/curl.cc b/tiledb/sm/rest/curl.cc index b781d1b6f62..8481ce211d5 100644 --- a/tiledb/sm/rest/curl.cc +++ b/tiledb/sm/rest/curl.cc @@ -32,7 +32,7 @@ #include "tiledb/sm/rest/curl.h" #include "tiledb/common/logger.h" -#include "tiledb/platform/cert_file.h" +#include "tiledb/sm/filesystem/ssl_config.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/misc/tdb_time.h" #include "tiledb/sm/misc/utils.h" @@ -302,28 +302,22 @@ Status Curl::init( return LOG_STATUS(Status_RestError( "Error initializing libcurl; failed to set CURLOPT_HEADERDATA")); - // Ignore ssl validation if the user has set rest.ignore_ssl_validation = true - bool ignore_ssl_validation = false; - bool found; - RETURN_NOT_OK(config_->get( - "rest.ignore_ssl_validation", &ignore_ssl_validation, &found)); + SSLConfig ssl_cfg = RestSSLConfig(*config_); - if (ignore_ssl_validation) { + if (ssl_cfg.verify() == false) { curl_easy_setopt(curl_.get(), CURLOPT_SSL_VERIFYHOST, 0); curl_easy_setopt(curl_.get(), CURLOPT_SSL_VERIFYPEER, 0); } - if constexpr (tiledb::platform::PlatformCertFile::enabled) { - // Get CA Cert bundle file from global state. This is initialized and cached - // if detected. We have only had issues with finding the certificate path on - // Linux. - const std::string cert_file = tiledb::platform::PlatformCertFile::get(); - // If we have detected a ca cert bundle let's set the curl option for CAINFO - if (!cert_file.empty()) { - curl_easy_setopt(curl_.get(), CURLOPT_CAINFO, cert_file.c_str()); - } + if (!ssl_cfg.ca_file().empty()) { + curl_easy_setopt(curl_.get(), CURLOPT_CAINFO, ssl_cfg.ca_file().c_str()); } + if (!ssl_cfg.ca_path().empty()) { + curl_easy_setopt(curl_.get(), CURLOPT_CAPATH, ssl_cfg.ca_path().c_str()); + } + + bool found; RETURN_NOT_OK( config_->get("rest.retry_count", &retry_count_, &found)); assert(found);