diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 84733a824e7ba..bb8309a247402 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1179,6 +1179,15 @@ Status CreateContainerIfNotExists(const std::string& container_name, } } +FileInfo FileInfoFromPath(std::string_view container, + const DataLake::Models::PathItem& path) { + FileInfo info{internal::ConcatAbstractPath(container, path.Name), + path.IsDirectory ? FileType::Directory : FileType::File}; + info.set_size(path.FileSize); + info.set_mtime(std::chrono::system_clock::time_point{path.LastModified}); + return info; +} + FileInfo DirectoryFileInfoFromPath(std::string_view path) { return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory}; } @@ -1576,7 +1585,7 @@ class AzureFileSystem::Impl { } private: - /// \pref location.container is not empty. + /// \pre location.container is not empty. template Status CheckDirExists(const ContainerClient& container_client, const AzureLocation& location) { @@ -1623,6 +1632,50 @@ class AzureFileSystem::Impl { return result; } + /// \brief List the paths at the root of a filesystem or some dir in a filesystem. + /// + /// \pre adlfs_client is the client for the filesystem named like the first + /// segment of select.base_dir. + Status GetFileInfoWithSelectorFromFileSystem( + const DataLake::DataLakeFileSystemClient& adlfs_client, + const Core::Context& context, Azure::Nullable page_size_hint, + const FileSelector& select, FileInfoVector* acc_results) { + ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir)); + + auto directory_client = adlfs_client.GetDirectoryClient(base_location.path); + bool found = false; + DataLake::ListPathsOptions options; + options.PageSizeHint = page_size_hint; + + try { + auto list_response = directory_client.ListPaths(select.recursive, options, context); + for (; list_response.HasPage(); list_response.MoveToNextPage(context)) { + if (list_response.Paths.empty()) { + continue; + } + found = true; + for (const auto& path : list_response.Paths) { + if (path.Name == base_location.path && !path.IsDirectory) { + return NotADir(base_location); + } + acc_results->push_back(FileInfoFromPath(base_location.container, path)); + } + } + } catch (const Storage::StorageException& exception) { + if (IsContainerNotFound(exception) || exception.ErrorCode == "PathNotFound") { + found = false; + } else { + return ExceptionToStatus(exception, + "Failed to list paths in a directory: ", select.base_dir, + ": ", directory_client.GetUrl()); + } + } + + return found || select.allow_not_found + ? Status::OK() + : ::arrow::fs::internal::PathNotFound(select.base_dir); + } + /// \brief List the blobs at the root of a container or some dir in a container. /// /// \pre container_client is the client for the container named like the first @@ -1772,6 +1825,20 @@ class AzureFileSystem::Impl { return VisitContainers(context, std::move(on_container)); } + auto adlfs_client = GetFileSystemClient(base_location.container); + ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client)); + if (hns_support == HNSSupport::kContainerNotFound) { + if (select.allow_not_found) { + return Status::OK(); + } else { + return ::arrow::fs::internal::PathNotFound(select.base_dir); + } + } + if (hns_support == HNSSupport::kEnabled) { + return GetFileInfoWithSelectorFromFileSystem(adlfs_client, context, page_size_hint, + select, acc_results); + } + DCHECK_EQ(hns_support, HNSSupport::kDisabled); auto container_client = blob_service_client_->GetBlobContainerClient(base_location.container); return GetFileInfoWithSelectorFromContainer(container_client, context, page_size_hint, @@ -2157,6 +2224,17 @@ class AzureFileSystem::Impl { Azure::Nullable lease_id = {}) { DCHECK(!location.container.empty()); DCHECK(!location.path.empty()); + ARROW_ASSIGN_OR_RAISE(auto file_info, GetFileInfo(adlfs_client, location, lease_id)); + if (file_info.type() == FileType::NotFound) { + if (require_dir_to_exist) { + return PathNotFound(location); + } else { + return Status::OK(); + } + } + if (file_info.type() != FileType::Directory) { + return NotADir(location); + } auto directory_client = adlfs_client.GetDirectoryClient( std::string(internal::RemoveTrailingSlash(location.path))); DataLake::DeleteDirectoryOptions options; @@ -2200,6 +2278,9 @@ class AzureFileSystem::Impl { kDelimiter, path.Name, ": ", sub_directory_client.GetUrl()); } } else { + if (path.Name == location.path) { + return NotADir(location); + } auto sub_file_client = adlfs_client.GetFileClient(path.Name); try { sub_file_client.Delete(); diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 24031e313f798..ed09bfc2fadd7 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -392,7 +392,7 @@ class TestGeneric : public ::testing::Test, public GenericFileSystemTest { bool allow_move_dir() const override { return false; } bool allow_move_file() const override { return true; } bool allow_append_to_file() const override { return true; } - bool have_directory_mtimes() const override { return false; } + bool have_directory_mtimes() const override { return true; } bool have_flaky_directory_tree_deletion() const override { return false; } bool have_file_metadata() const override { return true; } // calloc() used in libxml2's xmlNewGlobalState() is detected as a @@ -429,6 +429,8 @@ class TestAzuriteGeneric : public TestGeneric { protected: // Azurite doesn't support moving files over containers. bool allow_move_file() const override { return false; } + // Azurite doesn't support directory mtime. + bool have_directory_mtimes() const override { return false; } // DeleteDir() doesn't work with Azurite on macOS bool have_flaky_directory_tree_deletion() const override { return env_->HasSubmitBatchBug(); @@ -449,6 +451,8 @@ class TestAzureFlatNSGeneric : public TestGeneric { protected: // Flat namespace account doesn't support moving files over containers. bool allow_move_file() const override { return false; } + // Flat namespace account doesn't support directory mtime. + bool have_directory_mtimes() const override { return false; } }; class TestAzureHierarchicalNSGeneric : public TestGeneric {