From b9cbf80244a5c4c0de8cd5a996746a79da353146 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 5 Jan 2024 20:33:42 -0300 Subject: [PATCH] GH-39449: [C++] Use default Azure credentials implicitly and support anonymous credentials explicitly (#39450) ### Rationale for this change - Default credentials should be used by default. - There should be a way to connect to public containers without credentials (aka "anonymous credential"). ### What changes are included in this PR? - Sync ordering of declarations and definitions in the `AzureOptions` classs - Use default credentials even when `ConfigureDefaultCredential()` isn't explicitly called - Create clients when `credential_kind_` is "anonymous" instead of returning an error ### Are these changes tested? By new and existing tests. * Closes: #39449 Lead-authored-by: Felipe Oliveira Carvalho Co-authored-by: Sutou Kouhei Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/filesystem/azurefs.cc | 62 ++++++++++++++++-------- cpp/src/arrow/filesystem/azurefs.h | 37 +++++++++----- cpp/src/arrow/filesystem/azurefs_test.cc | 30 +++++++++--- 3 files changed, 92 insertions(+), 37 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 9569eff2e47ed..730adabd48bec 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -73,13 +73,17 @@ bool AzureOptions::Equals(const AzureOptions& other) const { return false; } switch (credential_kind_) { + case CredentialKind::kDefault: case CredentialKind::kAnonymous: return true; - case CredentialKind::kTokenCredential: - return token_credential_ == other.token_credential_; - case CredentialKind::kStorageSharedKeyCredential: + case CredentialKind::kStorageSharedKey: return storage_shared_key_credential_->AccountName == other.storage_shared_key_credential_->AccountName; + case CredentialKind::kClientSecret: + case CredentialKind::kManagedIdentity: + case CredentialKind::kWorkloadIdentity: + return token_credential_->GetCredentialName() == + other.token_credential_->GetCredentialName(); } DCHECK(false); return false; @@ -113,8 +117,19 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const { return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name); } +Status AzureOptions::ConfigureDefaultCredential() { + credential_kind_ = CredentialKind::kDefault; + token_credential_ = std::make_shared(); + return Status::OK(); +} + +Status AzureOptions::ConfigureAnonymousCredential() { + credential_kind_ = CredentialKind::kAnonymous; + return Status::OK(); +} + Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) { - credential_kind_ = CredentialKind::kStorageSharedKeyCredential; + credential_kind_ = CredentialKind::kStorageSharedKey; if (account_name.empty()) { return Status::Invalid("AzureOptions doesn't contain a valid account name"); } @@ -126,27 +141,21 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_ke Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id, const std::string& client_id, const std::string& client_secret) { - credential_kind_ = CredentialKind::kTokenCredential; + credential_kind_ = CredentialKind::kClientSecret; token_credential_ = std::make_shared( tenant_id, client_id, client_secret); return Status::OK(); } -Status AzureOptions::ConfigureDefaultCredential() { - credential_kind_ = CredentialKind::kTokenCredential; - token_credential_ = std::make_shared(); - return Status::OK(); -} - Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) { - credential_kind_ = CredentialKind::kTokenCredential; + credential_kind_ = CredentialKind::kManagedIdentity; token_credential_ = std::make_shared(client_id); return Status::OK(); } Status AzureOptions::ConfigureWorkloadIdentityCredential() { - credential_kind_ = CredentialKind::kTokenCredential; + credential_kind_ = CredentialKind::kWorkloadIdentity; token_credential_ = std::make_shared(); return Status::OK(); } @@ -158,11 +167,18 @@ Result> AzureOptions::MakeBlobServiceC } switch (credential_kind_) { case CredentialKind::kAnonymous: - break; - case CredentialKind::kTokenCredential: + return std::make_unique(AccountBlobUrl(account_name)); + case CredentialKind::kDefault: + if (!token_credential_) { + token_credential_ = std::make_shared(); + } + [[fallthrough]]; + case CredentialKind::kClientSecret: + case CredentialKind::kManagedIdentity: + case CredentialKind::kWorkloadIdentity: return std::make_unique(AccountBlobUrl(account_name), token_credential_); - case CredentialKind::kStorageSharedKeyCredential: + case CredentialKind::kStorageSharedKey: return std::make_unique(AccountBlobUrl(account_name), storage_shared_key_credential_); } @@ -176,11 +192,19 @@ AzureOptions::MakeDataLakeServiceClient() const { } switch (credential_kind_) { case CredentialKind::kAnonymous: - break; - case CredentialKind::kTokenCredential: + return std::make_unique( + AccountDfsUrl(account_name)); + case CredentialKind::kDefault: + if (!token_credential_) { + token_credential_ = std::make_shared(); + } + [[fallthrough]]; + case CredentialKind::kClientSecret: + case CredentialKind::kManagedIdentity: + case CredentialKind::kWorkloadIdentity: return std::make_unique( AccountDfsUrl(account_name), token_credential_); - case CredentialKind::kStorageSharedKeyCredential: + case CredentialKind::kStorageSharedKey: return std::make_unique( AccountDfsUrl(account_name), storage_shared_key_credential_); } diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 78e0a8148c616..55f89ba4776e2 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -47,8 +47,23 @@ namespace arrow::fs { class TestAzureFileSystem; /// Options for the AzureFileSystem implementation. +/// +/// By default, authentication is handled by the Azure SDK's credential chain +/// which may read from multiple environment variables, such as: +/// - `AZURE_TENANT_ID` +/// - `AZURE_CLIENT_ID` +/// - `AZURE_CLIENT_SECRET` +/// - `AZURE_AUTHORITY_HOST` +/// - `AZURE_CLIENT_CERTIFICATE_PATH` +/// - `AZURE_FEDERATED_TOKEN_FILE` +/// +/// Functions are provided for explicit configuration of credentials if that is preferred. struct ARROW_EXPORT AzureOptions { - /// \brief account name of the Azure Storage account. + /// \brief The name of the Azure Storage Account being accessed. + /// + /// All service URLs will be constructed using this storage account name. + /// `ConfigureAccountKeyCredential` assumes the user wants to authenticate + /// this account. std::string account_name; /// \brief hostname[:port] of the Azure Blob Storage Service. @@ -92,30 +107,30 @@ struct ARROW_EXPORT AzureOptions { private: enum class CredentialKind { + kDefault, kAnonymous, - kTokenCredential, - kStorageSharedKeyCredential, - } credential_kind_ = CredentialKind::kAnonymous; + kStorageSharedKey, + kClientSecret, + kManagedIdentity, + kWorkloadIdentity, + } credential_kind_ = CredentialKind::kDefault; - std::shared_ptr token_credential_; std::shared_ptr storage_shared_key_credential_; + mutable std::shared_ptr token_credential_; public: AzureOptions(); ~AzureOptions(); Status ConfigureDefaultCredential(); - - Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string()); - - Status ConfigureWorkloadIdentityCredential(); - + Status ConfigureAnonymousCredential(); Status ConfigureAccountKeyCredential(const std::string& account_key); - Status ConfigureClientSecretCredential(const std::string& tenant_id, const std::string& client_id, const std::string& client_secret); + Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string()); + Status ConfigureWorkloadIdentityCredential(); bool Equals(const AzureOptions& other) const; diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index ff94578b041dc..6104b04411b32 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -280,22 +280,38 @@ TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) { ASSERT_RAISES(Invalid, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) { +TEST(AzureFileSystem, InitializeWithDefaultCredential) { AzureOptions options; options.account_name = "dummy-account-name"; - ARROW_EXPECT_OK( - options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret")); + ARROW_EXPECT_OK(options.ConfigureDefaultCredential()); EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) { +TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) { AzureOptions options; options.account_name = "dummy-account-name"; - ARROW_EXPECT_OK(options.ConfigureDefaultCredential()); + AzureOptions explictly_default_options; + explictly_default_options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(explictly_default_options.ConfigureDefaultCredential()); + ASSERT_TRUE(options.Equals(explictly_default_options)); +} + +TEST(AzureFileSystem, InitializeWithAnonymousCredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(options.ConfigureAnonymousCredential()); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); +} + +TEST(AzureFileSystem, InitializeWithClientSecretCredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK( + options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret")); EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) { +TEST(AzureFileSystem, InitializeWithManagedIdentityCredential) { AzureOptions options; options.account_name = "dummy-account-name"; ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential()); @@ -305,7 +321,7 @@ TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) { EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) { +TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) { AzureOptions options; options.account_name = "dummy-account-name"; ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());