-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-38699: [C++][FS][Azure] Implement CreateDir()
#38708
Changes from all commits
bfb8cdd
5dcac69
aabc7b2
6a16e9f
4d312ad
60890c6
9dbc1c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,7 @@ | |
#include <azure/storage/common/storage_credential.hpp> | ||
#include <azure/storage/files/datalake.hpp> | ||
|
||
#include "arrow/filesystem/path_util.h" | ||
#include "arrow/filesystem/test_util.h" | ||
#include "arrow/result.h" | ||
#include "arrow/testing/gtest_util.h" | ||
|
@@ -225,6 +226,10 @@ class AzureFileSystemTest : public ::testing::Test { | |
return s; | ||
} | ||
|
||
std::string RandomContainerName() { return RandomChars(32); } | ||
|
||
std::string RandomDirectoryName() { return RandomChars(32); } | ||
|
||
void UploadLines(const std::vector<std::string>& lines, const char* path_to_file, | ||
int total_size) { | ||
// TODO(GH-38333): Switch to using Azure filesystem to write once its implemented. | ||
|
@@ -267,6 +272,22 @@ class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest { | |
} | ||
}; | ||
|
||
// How to enable this test: | ||
// | ||
// You need an Azure account. You should be able to create a free | ||
// account at https://azure.microsoft.com/en-gb/free/ . You should be | ||
// able to create a storage account through the portal Web UI. | ||
// | ||
// See also the official document how to create a storage account: | ||
// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account | ||
// | ||
// A few suggestions on configuration: | ||
// | ||
// * Use Standard general-purpose v2 not premium | ||
// * Use LRS redundancy | ||
// * Obviously you need to enable hierarchical namespace. | ||
// * Set the default access tier to hot | ||
// * SFTP, NFS and file shares are not required. | ||
class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest { | ||
Result<AzureOptions> MakeOptions() override { | ||
AzureOptions options; | ||
|
@@ -396,6 +417,96 @@ TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) { | |
RunGetFileInfoObjectTest(); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) { | ||
ASSERT_RAISES(Invalid, fs_->CreateDir("", false)); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { | ||
auto container_name = RandomContainerName(); | ||
ASSERT_OK(fs_->CreateDir(container_name, false)); | ||
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) { | ||
const auto path = PreexistingContainerPath() + RandomDirectoryName(); | ||
ASSERT_OK(fs_->CreateDir(path, false)); | ||
// There is only virtual directory without hierarchical namespace | ||
// support. So the CreateDir() does nothing. | ||
arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); | ||
} | ||
|
||
TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirSuccessContainerAndDirectory) { | ||
const auto path = PreexistingContainerPath() + RandomDirectoryName(); | ||
ASSERT_OK(fs_->CreateDir(path, false)); | ||
arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { | ||
const auto path = std::string("not-a-container/new-directory"); | ||
ASSERT_RAISES(IOError, fs_->CreateDir(path, false)); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { | ||
ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); | ||
} | ||
|
||
TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { | ||
auto container_name = RandomContainerName(); | ||
ASSERT_OK(fs_->CreateDir(container_name, true)); | ||
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { | ||
auto container_name = RandomContainerName(); | ||
ASSERT_OK(fs_->CreateDir(container_name, true)); | ||
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { | ||
const auto parent = PreexistingContainerPath() + RandomDirectoryName(); | ||
const auto path = internal::ConcatAbstractPath(parent, "new-sub"); | ||
ASSERT_OK(fs_->CreateDir(path, true)); | ||
arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); | ||
arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { | ||
const auto parent = PreexistingContainerPath() + RandomDirectoryName(); | ||
const auto path = internal::ConcatAbstractPath(parent, "new-sub"); | ||
ASSERT_OK(fs_->CreateDir(path, true)); | ||
// There is only virtual directory without hierarchical namespace | ||
// support. So the CreateDir() does nothing. | ||
arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); | ||
arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we can disambiguate "file exists?" queries in the filesystem API, we should probably always reply true when the caller is asking if a directory exists. If creating a directory is a no-op that succeeds, the post-condition of CreateDir -- the directory now exists -- should be true. There are might be bad consequences of this, so this is more of an idea than a suggestion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's an interring point. I don't feel strongly but personally I think the current behaviour is the best option.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with the opinion of Tom-Newton but let's discuss this further on #38772. |
||
} | ||
|
||
TEST_F(AzureHierarchicalNamespaceFileSystemTest, | ||
CreateDirRecursiveSuccessContainerAndDirectory) { | ||
auto container_name = RandomContainerName(); | ||
const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); | ||
const auto path = internal::ConcatAbstractPath(parent, "new-sub"); | ||
ASSERT_OK(fs_->CreateDir(path, true)); | ||
arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); | ||
arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); | ||
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) { | ||
auto container_name = RandomContainerName(); | ||
const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); | ||
const auto path = internal::ConcatAbstractPath(parent, "new-sub"); | ||
ASSERT_OK(fs_->CreateDir(path, true)); | ||
// There is only virtual directory without hierarchical namespace | ||
// support. So the CreateDir() does nothing. | ||
arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); | ||
arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); | ||
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, CreateDirUri) { | ||
ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true)); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { | ||
std::shared_ptr<io::InputStream> stream; | ||
ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); | ||
|
@@ -455,7 +566,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { | |
} | ||
|
||
TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) { | ||
ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfss://" + PreexistingObjectPath())); | ||
ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + PreexistingObjectPath())); | ||
} | ||
|
||
TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can see why
path_to_file
is actually the name of the directory in this context, but maybe a different name for this struct field would make things less confusing? This segment of filesystem paths is usually called "basename" [1].[1] https://en.wikipedia.org/wiki/Basename
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I spent quite some time trying to figure out what
if (path.path_to_file.empty()) {
meant here.path.basename.empty()
would be more clear IMO.cc @Tom-Newton
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that "basename" isn't suitable for this case.
I think that "d" is the basename of "a/b/c/d" but
path_to_file
is "b/c/d". ("a" iscontainer
.)I think that
path
is suitable for "b/c/d" butAzurePath::path
is strange... How about renamingAzurePath
toAzureLocation
and usingcontainer
fora
andpath
forb/c/d
?FYI:
abfs://...
URI yet. (FileSystemFromUri()
doesn't acceptabfs://...
yet.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh. I misinterpreted the meaning of
path_to_file
. I thinkpath
would be OK. AndAzure{Path->Location}
is also a good rename.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 for @kou's the suggested re-name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've opened a new issue for this: #38758
I'll do it after we merge this.