-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add custom multi part upload fs #152
Merged
sre-ci-robot
merged 12 commits into
milvus-io:main
from
shaoting-huang:part_upload_size
Oct 10, 2024
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
e7696cd
add custom multi part upload fs
shaoting-huang 25f26ff
add fs producer
shaoting-huang a71c486
add fs producer
shaoting-huang b595af2
add multi part upload s3 fs into s3 fs producer
shaoting-huang b46bbc3
fix check format
shaoting-huang 9d8a53b
clean up
shaoting-huang 1d5a099
add storage config
shaoting-huang d74c011
fix check format
shaoting-huang f054b95
sync with arrow 17.0.0
shaoting-huang 4fada40
sync with arrow 17.0.0
shaoting-huang 105a10c
sync with arrow 17.0.0
shaoting-huang ce5a099
sync with arrow 17.0.0
shaoting-huang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright 2024 Zilliz | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <string> | ||
#include "arrow/status.h" | ||
|
||
namespace milvus_storage { | ||
|
||
constexpr char kSep = '/'; | ||
|
||
arrow::Status NotAFile(std::string_view path) { | ||
return arrow::Status::IOError("Not a regular file: " + std::string(path)); | ||
} | ||
|
||
bool HasTrailingSlash(std::string_view s) { return !s.empty() && s.back() == kSep; } | ||
|
||
std::string EnsureTrailingSlash(std::string_view v) { | ||
if (!v.empty() && !HasTrailingSlash(v)) { | ||
// XXX How about "C:" on Windows? We probably don't want to turn it into "C:/"... | ||
// Unless the local filesystem always uses absolute paths | ||
return std::string(v) + kSep; | ||
} else { | ||
return std::string(v); | ||
} | ||
} | ||
|
||
std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s) { | ||
// XXX should strip trailing slash? | ||
|
||
auto pos = s.find_last_of(kSep); | ||
if (pos == std::string::npos) { | ||
// Empty parent | ||
return {{}, s}; | ||
} | ||
return {s.substr(0, pos), s.substr(pos + 1)}; | ||
} | ||
|
||
} // namespace milvus_storage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#include <arrow/io/interfaces.h> | ||
#include <arrow/status.h> | ||
#include <arrow/util/thread_pool.h> | ||
#include "common/log.h" | ||
|
||
namespace milvus_storage { | ||
|
||
template <typename... SubmitArgs> | ||
auto SubmitIO(arrow::io::IOContext io_context, SubmitArgs&&... submit_args) | ||
-> decltype(std::declval<::arrow::internal::Executor*>()->Submit(submit_args...)) { | ||
arrow::internal::TaskHints hints; | ||
hints.external_id = io_context.external_id(); | ||
return io_context.executor()->Submit(hints, io_context.stop_token(), std::forward<SubmitArgs>(submit_args)...); | ||
}; | ||
|
||
void CloseFromDestructor(arrow::io::FileInterface* file) { | ||
arrow::Status st = file->Close(); | ||
if (!st.ok()) { | ||
auto file_type = typeid(*file).name(); | ||
std::stringstream ss; | ||
ss << "When destroying file of type " << file_type << ": " << st.message(); | ||
LOG_STORAGE_FATAL_ << st.WithMessage(ss.str()); | ||
} | ||
} | ||
|
||
} // namespace milvus_storage |
99 changes: 99 additions & 0 deletions
99
cpp/include/milvus-storage/filesystem/s3/multi_part_upload_s3_fs.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
// Copyright 2024 Zilliz | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
#include <cstdlib> | ||
#include "common/log.h" | ||
#include "common/macro.h" | ||
|
||
#include <arrow/util/key_value_metadata.h> | ||
#include <arrow/filesystem/s3fs.h> | ||
#include "arrow/filesystem/filesystem.h" | ||
#include "arrow/util/macros.h" | ||
#include "arrow/util/uri.h" | ||
#include "arrow/io/interfaces.h" | ||
|
||
using namespace arrow; | ||
using ::arrow::fs::FileInfo; | ||
using ::arrow::fs::FileInfoGenerator; | ||
|
||
namespace milvus_storage { | ||
|
||
class MultiPartUploadS3FS : public arrow::fs::S3FileSystem { | ||
public: | ||
~MultiPartUploadS3FS() override; | ||
|
||
std::string type_name() const override { return "multiPartUploadS3"; } | ||
|
||
bool Equals(const FileSystem& other) const override; | ||
|
||
arrow::Result<std::string> PathFromUri(const std::string& uri_string) const override; | ||
|
||
arrow::Result<FileInfo> GetFileInfo(const std::string& path) override; | ||
|
||
arrow::Result<std::vector<arrow::fs::FileInfo>> GetFileInfo(const arrow::fs::FileSelector& select) override; | ||
|
||
FileInfoGenerator GetFileInfoGenerator(const arrow::fs::FileSelector& select) override; | ||
|
||
arrow::Status CreateDir(const std::string& path, bool recursive) override; | ||
|
||
arrow::Status DeleteDir(const std::string& path) override; | ||
|
||
arrow::Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override; | ||
|
||
Future<> DeleteDirContentsAsync(const std::string& path, bool missing_dir_ok) override; | ||
|
||
arrow::Status DeleteRootDirContents() override; | ||
|
||
arrow::Status DeleteFile(const std::string& path) override; | ||
|
||
arrow::Status Move(const std::string& src, const std::string& dest) override; | ||
|
||
arrow::Status CopyFile(const std::string& src, const std::string& dest) override; | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenOutputStreamWithUploadSize(const std::string& s, | ||
int64_t part_size); | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenOutputStreamWithUploadSize( | ||
const std::string& s, const std::shared_ptr<const arrow::KeyValueMetadata>& metadata, int64_t part_size); | ||
|
||
static arrow::Result<std::shared_ptr<MultiPartUploadS3FS>> Make( | ||
const arrow::fs::S3Options& options, const arrow::io::IOContext& = arrow::io::default_io_context()); | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::InputStream>> OpenInputStream(const std::string& path) override; | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::InputStream>> OpenInputStream(const FileInfo& info) override; | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> OpenInputFile(const std::string& s) override; | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> OpenInputFile(const FileInfo& info) override; | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenOutputStream( | ||
const std::string& path, const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override; | ||
|
||
arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenAppendStream( | ||
const std::string& path, const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override; | ||
|
||
protected: | ||
explicit MultiPartUploadS3FS(const arrow::fs::S3Options& options, const arrow::io::IOContext& io_context); | ||
|
||
class Impl; | ||
std::shared_ptr<Impl> impl_; | ||
}; | ||
|
||
} // namespace milvus_storage |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
S3FileSystem
already has such a field, can we use that instead of redefine it again?