diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 37f0f986..42192bdd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,7 +22,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v2 with: - go-version: 1.19 + go-version: 1.21 - name: Install dependencies run: cd go && go mod download @@ -35,6 +35,27 @@ jobs: -v /tmp/data:/data \ -v /tmp/config:/root/.minio \ minio/minio server /data + + - name: Install dependencies + uses: aminya/setup-cpp@v1 + with: + conan: 1.61.0 + cmake: true + + - name: setup conan + run: + conan remote add default-conan-local https://milvus01.jfrog.io/artifactory/api/conan/default-conan-local --insert + && conan remote list + + - name: conan package cache + uses: actions/cache@v3 + with: + path: ~/.conan + key: conan-${{ hashFiles('./cpp/conanfile.py') }} + restore-keys: conan- + + - name: Build c++ + run: cd cpp && make - name: Run tests - run: cd go && go test -v ./... + run: cd go && make && make test diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bbb92f2d..e23f5f1d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -9,6 +9,9 @@ option(WITH_BENCHMARK "Build with micro benchmark." ON) set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}) +include(GNUInstallDirs) + if (WITH_OPENDAL) add_compile_definitions(MILVUS_OPENDAL) @@ -16,12 +19,12 @@ if (WITH_OPENDAL) include(libopendal) endif() -find_package(Azure REQUIRED) +#find_package(Azure REQUIRED) find_package(Boost REQUIRED) find_package(Arrow REQUIRED) find_package(protobuf REQUIRED) find_package(glog REQUIRED) -find_package(AWSSDK REQUIRED) +#find_package(AWSSDK REQUIRED) file(GLOB_RECURSE SRC_FILES src/*.cpp src/*.cc) add_library(milvus-storage ${SRC_FILES}) @@ -49,4 +52,8 @@ endif() if (WITH_BENCHMARK) add_subdirectory(benchmark) -endif() \ No newline at end of file +endif() + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/milvus-storage.pc.in "${CMAKE_CURRENT_BINARY_DIR}/milvus-storage.pc" @ONLY) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/milvus-storage.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/") +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libmilvus-storage.dylib" DESTINATION "${CMAKE_INSTALL_LIBDIR}") \ No newline at end of file diff --git a/cpp/Makefile b/cpp/Makefile index f72c7ccc..da6cc9f6 100644 --- a/cpp/Makefile +++ b/cpp/Makefile @@ -7,8 +7,9 @@ endif build: mkdir -p build && cd build && \ - conan install .. --build=missing --update && \ - conan build .. + conan install .. --build=missing -u && \ + conan build .. && \ + conan install .. --install-folder . -g make debug: mkdir -p build && cd build && \ diff --git a/cpp/conanfile.py b/cpp/conanfile.py index 26c7b7db..e6ca9491 100644 --- a/cpp/conanfile.py +++ b/cpp/conanfile.py @@ -12,12 +12,13 @@ class StorageConan(ConanFile): - name = "storage" + name = "milvus-storage" description = "empty" topics = ("vector", "cloud", "ann") url = "https://github.com/milvus-io/milvus-storage" homepage = "https://github.com/milvus-io/milvus-storage" license = "Apache-2.0" + version = "0.1.0" settings = "os", "arch", "compiler", "build_type" options = { @@ -48,11 +49,12 @@ class StorageConan(ConanFile): "arrow:with_jemalloc": True, "boost:without_test": True, } - exports_sources = ( "src/*", + "include/*", "thirdparty/*", "test/*", + "benchmark/*", "CMakeLists.txt", "*.cmake", "conanfile.py", @@ -156,8 +158,7 @@ def build(self): def package(self): cmake = CMake(self) cmake.install() - files.rmdir(self, os.path.join(self.package_folder, "lib", "cmake")) - files.rmdir(self, os.path.join(self.package_folder, "lib", "pkgconfig")) + self.copy("*_c.h") def package_info(self): self.cpp_info.set_property("cmake_file_name", "storage") diff --git a/cpp/include/milvus-storage/packed/reader.h b/cpp/include/milvus-storage/packed/reader.h index 06ed1996..c808af0c 100644 --- a/cpp/include/milvus-storage/packed/reader.h +++ b/cpp/include/milvus-storage/packed/reader.h @@ -40,6 +40,12 @@ using RowOffsetMinHeap = class PackedRecordBatchReader : public arrow::RecordBatchReader { public: + // Test only + PackedRecordBatchReader(arrow::fs::FileSystem& fs, + const std::string& path, + const std::shared_ptr schema, + const int64_t buffer_size = DEFAULT_READ_BUFFER_SIZE); + PackedRecordBatchReader(arrow::fs::FileSystem& fs, const std::vector& paths, const std::shared_ptr schema, diff --git a/cpp/include/milvus-storage/packed/reader_c.h b/cpp/include/milvus-storage/packed/reader_c.h new file mode 100644 index 00000000..c7576fab --- /dev/null +++ b/cpp/include/milvus-storage/packed/reader_c.h @@ -0,0 +1,32 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef void* CReader; +typedef void* CStatus; +typedef void* CRecordBatch; +typedef void* CFileSystem; + +int Open(const char* path, struct ArrowSchema* schema, const int64_t buffer_size, struct ArrowArrayStream* out); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/cpp/src/milvus-storage.pc.in b/cpp/src/milvus-storage.pc.in new file mode 100644 index 00000000..cd11676a --- /dev/null +++ b/cpp/src/milvus-storage.pc.in @@ -0,0 +1,9 @@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: Milvus Storage +Description: Milvus Storage +Version: @PROJECT_VERSION@ + +Libs: -L${libdir} -lstorage +Cflags: -I${includedir} \ No newline at end of file diff --git a/cpp/src/packed/reader.cpp b/cpp/src/packed/reader.cpp index 3e9b94f7..c6aa969e 100644 --- a/cpp/src/packed/reader.cpp +++ b/cpp/src/packed/reader.cpp @@ -27,6 +27,13 @@ namespace milvus_storage { +PackedRecordBatchReader::PackedRecordBatchReader(arrow::fs::FileSystem& fs, + const std::string& path, + const std::shared_ptr schema, + const int64_t buffer_size) + : PackedRecordBatchReader( + fs, std::vector{path}, schema, std::vector(), std::set(), buffer_size) {} + PackedRecordBatchReader::PackedRecordBatchReader(arrow::fs::FileSystem& fs, const std::vector& paths, const std::shared_ptr schema, @@ -39,14 +46,27 @@ PackedRecordBatchReader::PackedRecordBatchReader(arrow::fs::FileSystem& fs, row_limit_(0), absolute_row_position_(0), read_count_(0) { + auto cols = std::set(needed_columns); + if (cols.empty()) { + for (int i = 0; i < schema->num_fields(); i++) { + cols.emplace(i); + } + } + auto offsets = std::vector(column_offsets); + if (column_offsets.empty()) { + for (int i = 0; i < schema->num_fields(); i++) { + offsets.emplace_back(0, i); + } + } std::set needed_paths; - for (int i : needed_columns) { - needed_column_offsets_.push_back(column_offsets[i]); - needed_paths.emplace(column_offsets[i].path_index); + for (int i : cols) { + needed_column_offsets_.push_back(offsets[i]); + needed_paths.emplace(offsets[i].path_index); } for (auto i : needed_paths) { auto result = MakeArrowFileReader(fs, paths[i]); if (!result.ok()) { + LOG_STORAGE_ERROR_ << "Error making file reader " << i << ":" << result.status().ToString(); throw std::runtime_error(result.status().ToString()); } file_readers_.emplace_back(std::move(result.value())); @@ -54,6 +74,10 @@ PackedRecordBatchReader::PackedRecordBatchReader(arrow::fs::FileSystem& fs, for (int i = 0; i < file_readers_.size(); ++i) { auto metadata = file_readers_[i]->parquet_reader()->metadata()->key_value_metadata()->Get(ROW_GROUP_SIZE_META_KEY); + if (!metadata.ok()) { + LOG_STORAGE_ERROR_ << "metadata not found in file " << i; + throw std::runtime_error(metadata.status().ToString()); + } row_group_sizes_.push_back(PackedMetaSerde::deserialize(metadata.ValueOrDie())); LOG_STORAGE_DEBUG_ << " file " << i << " metadata size: " << file_readers_[i]->parquet_reader()->metadata()->size(); } @@ -149,7 +173,6 @@ arrow::Status PackedRecordBatchReader::advanceBuffer() { } buffer_available_ -= plan_buffer_size; row_limit_ = sorted_offsets.top().second; - return arrow::Status::OK(); } diff --git a/cpp/src/packed/reader_c.cpp b/cpp/src/packed/reader_c.cpp new file mode 100644 index 00000000..a94279e6 --- /dev/null +++ b/cpp/src/packed/reader_c.cpp @@ -0,0 +1,47 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "packed/reader_c.h" +#include "common/log.h" +#include "packed/reader.h" +#include "filesystem/fs.h" +#include "common/config.h" + +#include +#include +#include +#include +#include + +int Open(const char* path, struct ArrowSchema* schema, const int64_t buffer_size, struct ArrowArrayStream* out) { + auto truePath = std::string(path); + auto factory = std::make_shared(); + auto conf = milvus_storage::StorageConfig(); + conf.uri = "file:///tmp/"; + auto r = factory->BuildFileSystem(conf, &truePath); + if (!r.ok()) { + LOG_STORAGE_ERROR_ << "Error building filesystem: " << path; + return -2; + } + auto trueFs = r.value(); + auto trueSchema = arrow::ImportSchema(schema).ValueOrDie(); + auto reader = std::make_shared(*trueFs, path, trueSchema, buffer_size); + auto status = ExportRecordBatchReader(reader, out); + LOG_STORAGE_ERROR_ << "read export done"; + if (!status.ok()) { + LOG_STORAGE_ERROR_ << "Error exporting record batch reader" << status.ToString(); + return static_cast(status.code()); + } + return 0; +} \ No newline at end of file diff --git a/cpp/test/packed/packed_test_base.h b/cpp/test/packed/packed_test_base.h index cc92371c..506e1e49 100644 --- a/cpp/test/packed/packed_test_base.h +++ b/cpp/test/packed/packed_test_base.h @@ -39,8 +39,6 @@ #include #include -using namespace std; - namespace milvus_storage { class PackedTestBase : public ::testing::Test { @@ -76,6 +74,7 @@ class PackedTestBase : public ::testing::Test { void TearDown() override { if (fs_ != nullptr) { + // FIXME: delete is not working fs_->DeleteDir(file_path_); } } @@ -201,7 +200,7 @@ class PackedTestBase : public ::testing::Test { std::vector int32_values; std::vector int64_values; - std::vector> str_values; + std::vector> str_values; }; } // namespace milvus_storage \ No newline at end of file diff --git a/go/Makefile b/go/Makefile index ef7cf605..c00e3461 100644 --- a/go/Makefile +++ b/go/Makefile @@ -1,6 +1,23 @@ +include ../cpp/build/conanbuildinfo.mak + +MILVUS_STORAGE_ROOT = $(abspath $(CURDIR)/..) +MILVUS_STORAGE_INCLUDE_DIR = $(abspath $(MILVUS_STORAGE_ROOT)/cpp/include) +MILVUS_STORAGE_LD_DIR = $(abspath $(MILVUS_STORAGE_ROOT)/cpp/build/Release) + +CFLAGS += $(CONAN_CFLAGS) +CXXFLAGS += $(CONAN_CXXFLAGS) +INCLUDE_DIRS = $(CONAN_INCLUDE_DIRS_ARROW) $(MILVUS_STORAGE_INCLUDE_DIR) +CPPFLAGS = $(addprefix -I, $(INCLUDE_DIRS)) +LDFLAGS += $(addprefix -L, $(MILVUS_STORAGE_LD_DIR)) + .EXPORT_ALL_VARIABLES: -.PHONY: proto +.PHONY: build + +build: + CGO_CFLAGS="$(CPPFLAGS)" CGO_LDFLAGS="$(LDFLAGS) -lmilvus-storage" go build ./... +test: + CGO_CFLAGS="$(CPPFLAGS)" CGO_LDFLAGS="$(LDFLAGS) -Wl,-rpath,$(MILVUS_STORAGE_LD_DIR) -lmilvus-storage" go test -timeout 30s ./... proto: mkdir -p proto/manifest_proto mkdir -p proto/schema_proto diff --git a/go/go.mod b/go/go.mod index 7bdc7af3..90d76a9b 100644 --- a/go/go.mod +++ b/go/go.mod @@ -2,31 +2,32 @@ module github.com/milvus-io/milvus-storage/go go 1.19 +toolchain go1.23.1 + require ( github.com/apache/arrow/go/v12 v12.0.0-20230223012627-e0e740bd7a24 github.com/bits-and-blooms/bitset v1.5.0 - github.com/google/uuid v1.3.0 + github.com/golang/protobuf v1.5.4 + github.com/google/uuid v1.6.0 github.com/minio/minio-go/v7 v7.0.61 - github.com/stretchr/testify v1.8.4 + github.com/pkg/errors v0.9.1 + github.com/stretchr/testify v1.9.0 go.uber.org/zap v1.24.0 - google.golang.org/grpc v1.57.0 - google.golang.org/protobuf v1.31.0 ) require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect - github.com/andybalholm/brotli v1.0.4 // indirect - github.com/apache/thrift v0.18.1 // indirect + github.com/andybalholm/brotli v1.1.0 // indirect + github.com/apache/thrift v0.19.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dustin/go-humanize v1.0.1 // indirect - github.com/goccy/go-json v0.10.2 // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/goccy/go-json v0.10.3 // indirect github.com/golang/snappy v0.0.4 // indirect - github.com/google/flatbuffers v2.0.8+incompatible // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/compress v1.16.7 // indirect - github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect @@ -34,8 +35,7 @@ require ( github.com/minio/sha256-simd v1.0.1 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/pierrec/lz4/v4 v4.1.18 // indirect - github.com/pkg/errors v0.9.1 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/rs/xid v1.5.0 // indirect @@ -44,16 +44,19 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.uber.org/goleak v1.2.1 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.11.0 // indirect - golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691 // indirect - golang.org/x/mod v0.12.0 // indirect - golang.org/x/net v0.12.0 // indirect - golang.org/x/sync v0.3.0 // indirect - golang.org/x/sys v0.10.0 // indirect - golang.org/x/text v0.11.0 // indirect - golang.org/x/tools v0.11.0 // indirect - golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230726155614-23370e0ffb3e // indirect + golang.org/x/crypto v0.24.0 // indirect + golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect + golang.org/x/mod v0.18.0 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/tools v0.22.0 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect + gonum.org/v1/gonum v0.14.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect + google.golang.org/grpc v1.63.2 // indirect + google.golang.org/protobuf v1.34.2 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/go.sum b/go/go.sum index a451209c..02f5243c 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,11 +1,11 @@ github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= -github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= -github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/apache/arrow/go/v12 v12.0.0-20230223012627-e0e740bd7a24 h1:3klg6Gtrm0jGkiXWLYricKhI1pYYFuBFXhGzOT5B1eo= github.com/apache/arrow/go/v12 v12.0.0-20230223012627-e0e740bd7a24/go.mod h1:3JcT3bSZFdc7wLPKSlQXhf3L0GjPz0TOmLlG1YXnBfU= -github.com/apache/thrift v0.18.1 h1:lNhK/1nqjbwbiOPDBPFJVKxgDEGSepKuTh6OLiXW8kg= -github.com/apache/thrift v0.18.1/go.mod h1:rdQn/dCcDKEWjjylUeueum4vQEjG2v8v2PqriUnbr+I= +github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk= +github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/bits-and-blooms/bitset v1.5.0 h1:NpE8frKRLGHIcEzkR+gZhiioW1+WbYV6fKwD6ZIpQT8= github.com/bits-and-blooms/bitset v1.5.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= @@ -16,29 +16,27 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM= -github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= -github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -58,8 +56,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -74,11 +72,11 @@ github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= @@ -90,36 +88,34 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= -golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA= -golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= -golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691 h1:/yRP+0AN7mf5DkD3BAI6TOFnd51gEoDEb8o35jIFtgw= -golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= -golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= +golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= -golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= -golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/tools v0.11.0 h1:EMCa6U9S2LtZXLAMoWiR/R8dAQFRqbAitmbJ2UKhoi8= -golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230726155614-23370e0ffb3e h1:S83+ibolgyZ0bqz7KEsUOPErxcv4VzlszxY+31OfB/E= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230726155614-23370e0ffb3e/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= -google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= -google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= +golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0= +gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= +google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= +google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= diff --git a/go/packed/packed.go b/go/packed/packed.go new file mode 100644 index 00000000..4c718050 --- /dev/null +++ b/go/packed/packed.go @@ -0,0 +1,50 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packed + +/* +#include +#include "milvus-storage/packed/reader_c.h" +#include "arrow/c/abi.h" +#include "arrow/c/helpers.h" +*/ +import "C" +import ( + "errors" + "fmt" + "unsafe" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/arrio" + "github.com/apache/arrow/go/v12/arrow/cdata" +) + +func Open(path string, schema *arrow.Schema, bufferSize int) (arrio.Reader, error) { + // var cSchemaPtr uintptr + // cSchema := cdata.SchemaFromPtr(cSchemaPtr) + var cas cdata.CArrowSchema + cdata.ExportArrowSchema(schema, &cas) + casPtr := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas)) + var cass cdata.CArrowArrayStream + + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + status := C.Open(cPath, casPtr, C.int64_t(bufferSize), (*C.struct_ArrowArrayStream)(unsafe.Pointer(&cass))) + if status != 0 { + return nil, errors.New(fmt.Sprintf("failed to open file: %s, status: %d", path, status)) + } + reader := cdata.ImportCArrayStream((*cdata.CArrowArrayStream)(unsafe.Pointer(&cass)), schema) + return reader, nil +} diff --git a/go/packed/packed_test.go b/go/packed/packed_test.go new file mode 100644 index 00000000..d236c4be --- /dev/null +++ b/go/packed/packed_test.go @@ -0,0 +1,69 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packed + +import ( + "testing" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/stretchr/testify/assert" +) + +func TestRead(t *testing.T) { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "a", Type: arrow.PrimitiveTypes.Int32}, + {Name: "b", Type: arrow.PrimitiveTypes.Int64}, + {Name: "c", Type: arrow.BinaryTypes.String}, + }, nil) + + b := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer b.Release() + for idx := range schema.Fields() { + switch idx { + case 0: + b.Field(idx).(*array.Int32Builder).AppendValues( + []int32{int32(1), int32(2), int32(3)}, nil, + ) + case 1: + b.Field(idx).(*array.Int64Builder).AppendValues( + []int64{int64(4), int64(5), int64(6)}, nil, + ) + case 2: + b.Field(idx).(*array.StringBuilder).AppendValues( + []string{"a", "b", "c"}, nil, + ) + } + } + //rec := b.NewRecord() + + path := "testdata/0" + // file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0666) + // assert.NoError(t, err) + // writer, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps()) + // assert.NoError(t, err) + // err = writer.Write(rec) + // assert.NoError(t, err) + // err = writer.Close() + // assert.NoError(t, err) + + reader, err := Open(path, schema, 10*1024*1024 /* 10MB */) + assert.NoError(t, err) + rr, err := reader.Read() + assert.NoError(t, err) + defer rr.Release() + assert.Equal(t, int64(300), rr.NumRows()) +} diff --git a/go/packed/testdata/0 b/go/packed/testdata/0 new file mode 100644 index 00000000..f5658b39 Binary files /dev/null and b/go/packed/testdata/0 differ