Skip to content

Commit

Permalink
Data transfer with arrow (facebookincubator#4)
Browse files Browse the repository at this point in the history
* Data transfer with arrow

* Velox to Arrow

* Fix a small bug in Type.h

* Clear comments and Create DataUtil class

Co-authored-by: lalalazy12 <lmengyao.xu@intel.com>
  • Loading branch information
2 people authored and Cheng Xu committed Sep 27, 2021
1 parent b4724cb commit 397b814
Show file tree
Hide file tree
Showing 7 changed files with 580 additions and 2 deletions.
103 changes: 103 additions & 0 deletions velox/external/arrow/abi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4

struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;

// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};

struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;

// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};

// EXPERIMENTAL: C stream interface

struct ArrowArrayStream {
// Callback to get the stream type
// (will be the same for all arrays in the stream).
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);

// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowArray must be released independently from the stream.
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);

// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowArrayStream*);

// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowArrayStream*);

// Opaque producer-specific data
void* private_data;
};

#ifdef __cplusplus
}
#endif
69 changes: 69 additions & 0 deletions velox/type/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,75 @@ bool typeExists(const std::string& name);
/// child types.
TypePtr getType(const std::string& name, std::vector<TypePtr> childTypes);

#define VELOX_DYNAMIC_ARROW_TYPE_DISPATCH(TEMPLATE_FUNC, typeKind, ...) \
VELOX_DYNAMIC_ARROW_TYPE_DISPATCH_IMPL(TEMPLATE_FUNC, , typeKind, __VA_ARGS__)

#define VELOX_DYNAMIC_ARROW_TYPE_DISPATCH_IMPL(PREFIX, SUFFIX, typeKind, ...) \
[&]() { \
switch (typeKind) { \
case 'b': { \
return PREFIX<::facebook::velox::TypeKind::BOOLEAN> SUFFIX( \
__VA_ARGS__); \
} \
case 'i': { \
return PREFIX<::facebook::velox::TypeKind::INTEGER> SUFFIX( \
__VA_ARGS__); \
} \
case 'c': { \
return PREFIX<::facebook::velox::TypeKind::TINYINT> SUFFIX( \
__VA_ARGS__); \
} \
case 's': { \
return PREFIX<::facebook::velox::TypeKind::SMALLINT> SUFFIX( \
__VA_ARGS__); \
} \
case 'l': { \
return PREFIX<::facebook::velox::TypeKind::BIGINT> SUFFIX( \
__VA_ARGS__); \
} \
case 'f': { \
return PREFIX<::facebook::velox::TypeKind::REAL> SUFFIX(__VA_ARGS__); \
} \
case 'g': { \
return PREFIX<::facebook::velox::TypeKind::DOUBLE> SUFFIX( \
__VA_ARGS__); \
} \
default: \
VELOX_FAIL("not a known type kind: {}", mapTypeKindToName(typeKind)); \
} \
}()


/***TODO: \
case ::facebook::velox::TypeKind::VARCHAR: { \
return PREFIX<::facebook::velox::TypeKind::VARCHAR> SUFFIX( \
__VA_ARGS__); \
} \
case ::facebook::velox::TypeKind::VARBINARY: { \
return PREFIX<::facebook::velox::TypeKind::VARBINARY> SUFFIX( \
__VA_ARGS__); \
} \
case ::facebook::velox::TypeKind::TIMESTAMP: { \
return PREFIX<::facebook::velox::TypeKind::TIMESTAMP> SUFFIX( \
__VA_ARGS__); \
} \
case ::facebook::velox::TypeKind::ARRAY: { \
return PREFIX<::facebook::velox::TypeKind::ARRAY> SUFFIX(__VA_ARGS__); \
} \
case ::facebook::velox::TypeKind::MAP: { \
return PREFIX<::facebook::velox::TypeKind::MAP> SUFFIX(__VA_ARGS__); \
} \
case ::facebook::velox::TypeKind::ROW: { \
return PREFIX<::facebook::velox::TypeKind::ROW> SUFFIX(__VA_ARGS__); \
}
***/ \







} // namespace facebook::velox

namespace folly {
Expand Down
3 changes: 2 additions & 1 deletion velox/vector/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ add_library(
SelectivityVector.cpp
SimpleVector.cpp
VectorStream.cpp
VectorEncoding.cpp)
VectorEncoding.cpp
DataExchangeWithArrow.cpp)

target_link_libraries(velox_vector velox_memory velox_type velox_encode)

Expand Down
Loading

0 comments on commit 397b814

Please sign in to comment.