From 541e9e0a0c0ba092c8fe35d6b3ddd14da5511645 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 1 Apr 2023 14:24:23 -0300 Subject: [PATCH 01/90] document test build setup --- .../postgresql/CMakeUserPresets.json.example | 33 +++++++++++++++++++ c/driver/postgresql/README.md | 13 ++++++++ docker-compose.yml | 2 +- 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 c/driver/postgresql/CMakeUserPresets.json.example diff --git a/c/driver/postgresql/CMakeUserPresets.json.example b/c/driver/postgresql/CMakeUserPresets.json.example new file mode 100644 index 0000000000..986cc5e1d6 --- /dev/null +++ b/c/driver/postgresql/CMakeUserPresets.json.example @@ -0,0 +1,33 @@ +{ + "version": 3, + "cmakeMinimumRequired": { + "major": 3, + "minor": 21, + "patch": 0 + }, + "configurePresets": [ + { + "name": "user-local", + "displayName": "(user) local build", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug", + "ADBC_BUILD_TESTS": "ON" + }, + "environment": { + "PKG_CONFIG_PATH": "" + } + } + ], + "testPresets": [ + { + "name": "user-test-preset", + "description": "", + "displayName": "(user) test preset", + "configurePreset": "user-local", + "environment": { + "CTEST_OUTPUT_ON_FAILURE": "1", + "ADBC_POSTGRESQL_TEST_URI": "postgresql://localhost:5432/postgres?user=postgres&password=password" + } + } + ] +} diff --git a/c/driver/postgresql/README.md b/c/driver/postgresql/README.md index 15859d0629..cc5a3dfe03 100644 --- a/c/driver/postgresql/README.md +++ b/c/driver/postgresql/README.md @@ -50,6 +50,15 @@ $ docker run -it --rm \ postgres ``` +Alternatively use the `docker compose` provided by ADBC to manage the test +database container. + +```shell +$ docker compose up postgres_test +# When finished: +# docker compose down postgres_test +``` + Then, to run the tests, set the environment variable specifying the PostgreSQL URI before running tests: @@ -57,3 +66,7 @@ PostgreSQL URI before running tests: $ export ADBC_POSTGRESQL_TEST_URI=postgresql://localhost:5432/postgres?user=postgres&password=password $ ctest ``` + +Users of VSCode can use the CMake extension with the supplied CMakeUserPresets.json +example to supply the required CMake and environment variables required to build and +run tests. diff --git a/docker-compose.yml b/docker-compose.yml index 55cc5c53f3..77ca8ae95a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,7 @@ services: # These reuse Arrow's images for simplicity. You won't be able to # build the image from here. - ############################ Documentation################################### + ############################ Documentation ################################### docs: image: condaforge/mambaforge:latest From 93d7c510493b7bb92d5ce3f2ab6270a26ec1dee9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 1 Apr 2023 23:55:16 -0300 Subject: [PATCH 02/90] thinking about converting --- c/driver/postgresql/converter.h | 97 +++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 c/driver/postgresql/converter.h diff --git a/c/driver/postgresql/converter.h b/c/driver/postgresql/converter.h new file mode 100644 index 0000000000..8f0e626f97 --- /dev/null +++ b/c/driver/postgresql/converter.h @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include + +#include "type.h" + +namespace adbcpq { + +void BSwapArray(uint8_t* data, int64_t size_bytes, int32_t bitwidth) { + switch (bitwidth) { + case 1: + case 8: + break; + case 16: + break; + case 32: + break; + case 64: + break; + default: + break; + } +} + +class ArrowConverter { + public: + ArrowConverter(ArrowType type, PgType pg_type) + : type_(type), pg_type_(pg_type), offsets_(nullptr), data_(nullptr) { + memset(&schema_view_, 0, sizeof(ArrowSchemaView)); + } + + virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, type_)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); + + for (int32_t i = 0; i < 3; i++) { + switch (schema_view_.layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + if (schema_view_.layout.element_size_bits[i] == 32) { + offsets_ = ArrowArrayBuffer(array, i); + } + break; + case NANOARROW_BUFFER_TYPE_DATA: + data_ = ArrowArrayBuffer(array, i); + break; + default: + break; + } + } + + return NANOARROW_OK; + } + + virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) = 0; + + virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { + return NANOARROW_OK; + } + + protected: + PgType pg_type_; + ArrowType type_; + ArrowSchemaView schema_view_; + ArrowBuffer* offsets_; + ArrowBuffer* large_offsets_; + ArrowBuffer* data_; +}; + +} // namespace adbcpq From 401944b998c0705f558ad4cd6df175853aba3048 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 2 Apr 2023 17:48:35 -0300 Subject: [PATCH 03/90] two converters --- c/driver/postgresql/converter.h | 68 ++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/converter.h b/c/driver/postgresql/converter.h index 8f0e626f97..f3bda520c6 100644 --- a/c/driver/postgresql/converter.h +++ b/c/driver/postgresql/converter.h @@ -56,7 +56,7 @@ class ArrowConverter { return NANOARROW_OK; } - ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { + virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); @@ -94,4 +94,70 @@ class ArrowConverter { ArrowBuffer* data_; }; +// Converter for Pg->Arrow conversions whose representations are identical (minus +// the bswap from network endian). This includes all integral and float types. +class NumericArrowConverter : public ArrowConverter { + public: + NumericArrowConverter(ArrowType type, PgType pg_type) + : ArrowConverter(type, pg_type), data_(nullptr) {} + + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(ArrowConverter::InitSchema(schema)); + bitwidth_ = schema_view_.layout.element_size_bits[1]; + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(ArrowConverter::InitArray(array, schema)); + data_ = ArrowArrayBuffer(array, 1); + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + return ArrowBufferAppendBufferView(data_, data); + } + + ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) override { + BSwapArray(data_->data, data_->size_bytes, bitwidth_); + return NANOARROW_OK; + } + + private: + ArrowBuffer* data_; + int32_t bitwidth_; +}; + +// Converter for Pg->Arrow conversions whose Arrow representation is simply the +// bytes of the field representation. This can be used with binary and string +// Arrow types and any postgres type. +class BinaryArrowConverter : public ArrowConverter { + public: + BinaryArrowConverter(ArrowType type, PgType pg_type) + : ArrowConverter(type, pg_type), data_(nullptr) {} + + ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(ArrowConverter::InitArray(array, schema)); + offsets_ = ArrowArrayBuffer(array, 1); + data_ = ArrowArrayBuffer(array, 2); + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if ((data_->size_bytes + data.size_bytes) > std::numeric_limits::max()) { + return EOVERFLOW; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, (int32_t)data_->size_bytes)); + return NANOARROW_OK; + } + + private: + ArrowBuffer* offsets_; + ArrowBuffer* data_; + int32_t bitwidth_; +}; + } // namespace adbcpq From b53ae7b9106f0161f0a929d45d2c2910b3c8685b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 2 Apr 2023 23:11:12 -0300 Subject: [PATCH 04/90] some postgres type stuff --- c/driver/postgresql/converter.h | 154 ++++++++++++++++++++++++++++---- 1 file changed, 137 insertions(+), 17 deletions(-) diff --git a/c/driver/postgresql/converter.h b/c/driver/postgresql/converter.h index f3bda520c6..162bc43df7 100644 --- a/c/driver/postgresql/converter.h +++ b/c/driver/postgresql/converter.h @@ -19,29 +19,149 @@ #include #include -#include +#include +#include +#include #include #include "type.h" +#include "util.h" namespace adbcpq { -void BSwapArray(uint8_t* data, int64_t size_bytes, int32_t bitwidth) { - switch (bitwidth) { - case 1: - case 8: - break; - case 16: - break; - case 32: - break; - case 64: - break; - default: - break; - } -} +class PostgresType { + public: + // As listed on https://www.postgresql.org/docs/current/datatype.html + enum PgTypeId { + PG_TYPE_UNINITIALIZED, + PG_TYPE_BIGINT, + PG_TYPE_BIGSERIAL, + PG_TYPE_BIT, + PG_TYPE_BIT_VARYING, + PG_TYPE_BOOLEAN, + PG_TYPE_BOX, + PG_TYPE_BYTEA, + PG_TYPE_CHARACTER, + PG_TYPE_CHARACTER_VARYING, + PG_TYPE_CIDR, + PG_TYPE_CIRCLE, + PG_TYPE_DATE, + PG_TYPE_DOUBLE_PRECISION, + PG_TYPE_INET, + PG_TYPE_INTEGER, + PG_TYPE_INTERVAL, + PG_TYPE_JSON, + PG_TYPE_JSONB, + PG_TYPE_LINE, + PG_TYPE_LSEG, + PG_TYPE_MACADDR, + PG_TYPE_MACADDR8, + PG_TYPE_MONEY, + PG_TYPE_NUMERIC, + PG_TYPE_PATH, + PG_TYPE_PG_LSN, + PG_TYPE_PG_SNAPSHOT, + PG_TYPE_POINT, + PG_TYPE_POLYGON, + PG_TYPE_REAL, + PG_TYPE_SMALLINT, + PG_TYPE_SMALLSERIAL, + PG_TYPE_SERIAL, + PG_TYPE_TEXT, + PG_TYPE_TIME, + PG_TYPE_TIMESTAMP, + PG_TYPE_TSQUERY, + PG_TYPE_TSVECTOR, + PG_TYPE_TXID_SNAPSHOT, + PG_TYPE_UUID, + PG_TYPE_XML, + + PG_TYPE_ARRAY, + PG_TYPE_COMPOSITE, + PG_TYPE_RANGE + }; + + PostgresType(PgTypeId id, PgTypeId storage_id) + : id_(id), storage_id_(storage_id), n_(-1), precision_(-1), scale_(-1) {} + + explicit PostgresType(PgTypeId id) : PostgresType(id, id) {} + + PgTypeId id() const { return id_; } + PgTypeId storage_id() const { return storage_id_; } + const std::string& name() const { return name_; } + int32_t n() const { return n_; } + int32_t precision() const { return precision_; } + int32_t scale() const { return scale_; } + const std::string& timezone() const { return timezone_; } + int64_t n_children() const { return static_cast(children_.size()); } + const PostgresType* child(int64_t i) const { return children_[i].get(); } + + private: + PgTypeId id_; + PgTypeId storage_id_; + std::string name_; + int32_t n_; + int32_t precision_; + int32_t scale_; + std::string timezone_; + std::vector> children_; + + public: + PostgresType BigInt() { return PostgresType(PG_TYPE_BIGINT); } + PostgresType BigSerial() { return PostgresType(PG_TYPE_BIGSERIAL, PG_TYPE_BIGINT); } + PostgresType Bit(int32_t n) { + PostgresType out(PG_TYPE_BIT, PG_TYPE_TEXT); + out.n_ = n; + return out; + } + PostgresType BitVarying(int32_t n) { + PostgresType out(PG_TYPE_BIT_VARYING, PG_TYPE_TEXT); + out.n_ = n; + return out; + } + PostgresType Boolean() { return PostgresType(PG_TYPE_BOOLEAN); } + PostgresType Bytea() { return PostgresType(PG_TYPE_BYTEA); } + PostgresType Character(int32_t n) { + PostgresType out(PG_TYPE_CHARACTER, PG_TYPE_TEXT); + out.n_ = n; + return out; + } + PostgresType CharacterVarying(int32_t n) { + PostgresType out(PG_TYPE_CHARACTER_VARYING, PG_TYPE_TEXT); + out.n_ = n; + return out; + } + PostgresType Date() { return PostgresType(PG_TYPE_DATE, PG_TYPE_INTEGER); } + PostgresType DoublePrecision() { return PostgresType(PG_TYPE_DOUBLE_PRECISION); } + PostgresType Integer() { return PostgresType(PG_TYPE_INTEGER); } + PostgresType Numeric(int32_t precision, int32_t scale) { + PostgresType out(PG_TYPE_NUMERIC); + out.precision_ = precision; + out.scale_ = scale; + return out; + } + PostgresType Real() { return PostgresType(PG_TYPE_REAL); } + PostgresType SmallInt() { return PostgresType(PG_TYPE_SMALLINT); } + PostgresType SmallSerial() { + return PostgresType(PG_TYPE_SMALLSERIAL, PG_TYPE_SMALLINT); + } + PostgresType Serial() { return PostgresType(PG_TYPE_SERIAL, PG_TYPE_INTEGER); } + PostgresType Text() { return PostgresType(PG_TYPE_TEXT); } + PostgresType Time(const std::string& timezone = "") { + PostgresType out(PG_TYPE_TIME); + out.timezone_ = timezone; + if (timezone == "") { + out.storage_id_ = PG_TYPE_BIGINT; + } + return out; + } + PostgresType Timestamp(const std::string& timezone = "") { + PostgresType out(PG_TYPE_TIMESTAMP, PG_TYPE_BIGINT); + out.timezone_ = timezone; + return out; + } +}; class ArrowConverter { public: @@ -119,7 +239,7 @@ class NumericArrowConverter : public ArrowConverter { } ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) override { - BSwapArray(data_->data, data_->size_bytes, bitwidth_); + BufferToHostEndian(data_->data, data_->size_bytes, bitwidth_); return NANOARROW_OK; } From c20673faf85c4b1108e40365eb690be9f2603d43 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 09:12:14 -0300 Subject: [PATCH 05/90] add some other endian swappers --- c/driver/postgresql/util.h | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/c/driver/postgresql/util.h b/c/driver/postgresql/util.h index d8729fd4b0..118114e749 100644 --- a/c/driver/postgresql/util.h +++ b/c/driver/postgresql/util.h @@ -41,12 +41,16 @@ namespace adbcpq { #define MAKE_NAME(x, y) CONCAT(x, y) #if defined(_WIN32) && defined(_MSC_VER) +static inline uint32_t SwapNetworkToHost(uint16_t x) { return ntohs(x); } +static inline uint32_t SwapHostToNetwork(uint16_t x) { return htons(x); } static inline uint32_t SwapNetworkToHost(uint32_t x) { return ntohl(x); } static inline uint32_t SwapHostToNetwork(uint32_t x) { return htonl(x); } static inline uint64_t SwapNetworkToHost(uint64_t x) { return ntohll(x); } static inline uint64_t SwapHostToNetwork(uint64_t x) { return htonll(x); } #elif defined(_WIN32) // e.g., msys2, where ntohll is not necessarily defined +static inline uint32_t SwapNetworkToHost(uint16_t x) { return ntohs(x); } +static inline uint32_t SwapHostToNetwork(uint16_t x) { return htons(x); } static inline uint32_t SwapNetworkToHost(uint32_t x) { return ntohl(x); } static inline uint32_t SwapHostToNetwork(uint32_t x) { return htonl(x); } static inline uint64_t SwapNetworkToHost(uint64_t x) { @@ -57,17 +61,53 @@ static inline uint64_t SwapNetworkToHost(uint64_t x) { } static inline uint64_t SwapHostToNetwork(uint64_t x) { return SwapNetworkToHost(x); } #elif defined(__APPLE__) +static inline uint16_t SwapNetworkToHost(uint16_t x) { return OSSwapBigToHostInt16(x); } +static inline uint16_t SwapHostToNetwork(uint16_t x) { return OSSwapHostToBigInt16(x); } static inline uint32_t SwapNetworkToHost(uint32_t x) { return OSSwapBigToHostInt32(x); } static inline uint32_t SwapHostToNetwork(uint32_t x) { return OSSwapHostToBigInt32(x); } static inline uint64_t SwapNetworkToHost(uint64_t x) { return OSSwapBigToHostInt64(x); } static inline uint64_t SwapHostToNetwork(uint64_t x) { return OSSwapHostToBigInt64(x); } #else +static inline uint16_t SwapNetworkToHost(uint16_t x) { return be16toh(x); } +static inline uint16_t SwapHostToNetwork(uint16_t x) { return htobe16(x); } static inline uint32_t SwapNetworkToHost(uint32_t x) { return be32toh(x); } static inline uint32_t SwapHostToNetwork(uint32_t x) { return htobe32(x); } static inline uint64_t SwapNetworkToHost(uint64_t x) { return be64toh(x); } static inline uint64_t SwapHostToNetwork(uint64_t x) { return htobe64(x); } #endif +static inline void BufferToHostEndian(uint8_t* data, int64_t size_bytes, + int32_t bitwidth) { + switch (bitwidth) { + case 1: + case 8: + break; + case 16: { + uint16_t* data_uint = reinterpret_cast(data); + for (int64_t i = 0; i < size_bytes / 2; i++) { + data_uint[i] = SwapNetworkToHost(data_uint[i]); + } + break; + } + case 32: { + uint32_t* data_uint = reinterpret_cast(data); + for (int64_t i = 0; i < size_bytes / 4; i++) { + data_uint[i] = SwapNetworkToHost(data_uint[i]); + } + break; + } + case 64: { + uint64_t* data_uint = reinterpret_cast(data); + for (int64_t i = 0; i < size_bytes / 8; i++) { + data_uint[i] = SwapNetworkToHost(data_uint[i]); + } + break; + } + default: + break; + } +} + // see arrow/util/string_builder.h template From f1a056c1e0ac06e7d0d280fd0bf0a2ef6dea7546 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 10:12:32 -0300 Subject: [PATCH 06/90] maybe nested --- c/driver/postgresql/converter.h | 101 +++++++++++++++++--------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/c/driver/postgresql/converter.h b/c/driver/postgresql/converter.h index 162bc43df7..ef8342cdc5 100644 --- a/c/driver/postgresql/converter.h +++ b/c/driver/postgresql/converter.h @@ -30,7 +30,7 @@ namespace adbcpq { -class PostgresType { +class PostgresField { public: // As listed on https://www.postgresql.org/docs/current/datatype.html enum PgTypeId { @@ -82,85 +82,105 @@ class PostgresType { PG_TYPE_RANGE }; - PostgresType(PgTypeId id, PgTypeId storage_id) + PostgresField(PgTypeId id, PgTypeId storage_id) : id_(id), storage_id_(storage_id), n_(-1), precision_(-1), scale_(-1) {} - explicit PostgresType(PgTypeId id) : PostgresType(id, id) {} + explicit PostgresField(PgTypeId id) : PostgresField(id, id) {} PgTypeId id() const { return id_; } PgTypeId storage_id() const { return storage_id_; } - const std::string& name() const { return name_; } + const std::string& typ_namename() const { return type_name_; } int32_t n() const { return n_; } int32_t precision() const { return precision_; } int32_t scale() const { return scale_; } const std::string& timezone() const { return timezone_; } int64_t n_children() const { return static_cast(children_.size()); } - const PostgresType* child(int64_t i) const { return children_[i].get(); } + const PostgresField* child(int64_t i) const { return children_[i].get(); } private: PgTypeId id_; PgTypeId storage_id_; - std::string name_; + std::string type_name_; int32_t n_; int32_t precision_; int32_t scale_; std::string timezone_; - std::vector> children_; + std::vector> children_; public: - PostgresType BigInt() { return PostgresType(PG_TYPE_BIGINT); } - PostgresType BigSerial() { return PostgresType(PG_TYPE_BIGSERIAL, PG_TYPE_BIGINT); } - PostgresType Bit(int32_t n) { - PostgresType out(PG_TYPE_BIT, PG_TYPE_TEXT); + PostgresField BigInt() { return PostgresField(PG_TYPE_BIGINT); } + PostgresField BigSerial() { return PostgresField(PG_TYPE_BIGSERIAL, PG_TYPE_BIGINT); } + PostgresField Bit(int32_t n) { + PostgresField out(PG_TYPE_BIT, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresType BitVarying(int32_t n) { - PostgresType out(PG_TYPE_BIT_VARYING, PG_TYPE_TEXT); + PostgresField BitVarying(int32_t n) { + PostgresField out(PG_TYPE_BIT_VARYING, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresType Boolean() { return PostgresType(PG_TYPE_BOOLEAN); } - PostgresType Bytea() { return PostgresType(PG_TYPE_BYTEA); } - PostgresType Character(int32_t n) { - PostgresType out(PG_TYPE_CHARACTER, PG_TYPE_TEXT); + PostgresField Boolean() { return PostgresField(PG_TYPE_BOOLEAN); } + PostgresField Bytea() { return PostgresField(PG_TYPE_BYTEA); } + PostgresField Character(int32_t n) { + PostgresField out(PG_TYPE_CHARACTER, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresType CharacterVarying(int32_t n) { - PostgresType out(PG_TYPE_CHARACTER_VARYING, PG_TYPE_TEXT); + PostgresField CharacterVarying(int32_t n) { + PostgresField out(PG_TYPE_CHARACTER_VARYING, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresType Date() { return PostgresType(PG_TYPE_DATE, PG_TYPE_INTEGER); } - PostgresType DoublePrecision() { return PostgresType(PG_TYPE_DOUBLE_PRECISION); } - PostgresType Integer() { return PostgresType(PG_TYPE_INTEGER); } - PostgresType Numeric(int32_t precision, int32_t scale) { - PostgresType out(PG_TYPE_NUMERIC); + PostgresField Date() { return PostgresField(PG_TYPE_DATE, PG_TYPE_INTEGER); } + PostgresField DoublePrecision() { return PostgresField(PG_TYPE_DOUBLE_PRECISION); } + PostgresField Integer() { return PostgresField(PG_TYPE_INTEGER); } + PostgresField Numeric(int32_t precision, int32_t scale) { + PostgresField out(PG_TYPE_NUMERIC); out.precision_ = precision; out.scale_ = scale; return out; } - PostgresType Real() { return PostgresType(PG_TYPE_REAL); } - PostgresType SmallInt() { return PostgresType(PG_TYPE_SMALLINT); } - PostgresType SmallSerial() { - return PostgresType(PG_TYPE_SMALLSERIAL, PG_TYPE_SMALLINT); + PostgresField Real() { return PostgresField(PG_TYPE_REAL); } + PostgresField SmallInt() { return PostgresField(PG_TYPE_SMALLINT); } + PostgresField SmallSerial() { + return PostgresField(PG_TYPE_SMALLSERIAL, PG_TYPE_SMALLINT); } - PostgresType Serial() { return PostgresType(PG_TYPE_SERIAL, PG_TYPE_INTEGER); } - PostgresType Text() { return PostgresType(PG_TYPE_TEXT); } - PostgresType Time(const std::string& timezone = "") { - PostgresType out(PG_TYPE_TIME); + PostgresField Serial() { return PostgresField(PG_TYPE_SERIAL, PG_TYPE_INTEGER); } + PostgresField Text() { return PostgresField(PG_TYPE_TEXT); } + PostgresField Time(const std::string& timezone = "") { + PostgresField out(PG_TYPE_TIME); out.timezone_ = timezone; if (timezone == "") { out.storage_id_ = PG_TYPE_BIGINT; } return out; } - PostgresType Timestamp(const std::string& timezone = "") { - PostgresType out(PG_TYPE_TIMESTAMP, PG_TYPE_BIGINT); + PostgresField Timestamp(const std::string& timezone = "") { + PostgresField out(PG_TYPE_TIMESTAMP, PG_TYPE_BIGINT); out.timezone_ = timezone; return out; } + + PostgresField Array(PostgresField& child) { + PostgresField out(PG_TYPE_ARRAY); + std::unique_ptr child_ptr(new PostgresField(std::move(child))); + out.children_.push_back(std::move(child_ptr)); + return out; + } + + PostgresField Composite(std::vector> children) { + PostgresField out(PG_TYPE_ARRAY); + out.children_ = std::move(children); + return out; + } + + PostgresField Range(PostgresField& child) { + PostgresField out(PG_TYPE_RANGE); + std::unique_ptr child_ptr(new PostgresField(std::move(child))); + out.children_.push_back(std::move(child_ptr)); + return out; + } }; class ArrowConverter { @@ -227,12 +247,6 @@ class NumericArrowConverter : public ArrowConverter { return NANOARROW_OK; } - ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(ArrowConverter::InitArray(array, schema)); - data_ = ArrowArrayBuffer(array, 1); - return NANOARROW_OK; - } - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { return ArrowBufferAppendBufferView(data_, data); @@ -256,13 +270,6 @@ class BinaryArrowConverter : public ArrowConverter { BinaryArrowConverter(ArrowType type, PgType pg_type) : ArrowConverter(type, pg_type), data_(nullptr) {} - ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(ArrowConverter::InitArray(array, schema)); - offsets_ = ArrowArrayBuffer(array, 1); - data_ = ArrowArrayBuffer(array, 2); - return NANOARROW_OK; - } - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if ((data_->size_bytes + data.size_bytes) > std::numeric_limits::max()) { From 28baa38b5e0d5983e0c465c7e1069e5787c49b12 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 10:35:58 -0300 Subject: [PATCH 07/90] type names --- c/driver/postgresql/converter.h | 152 +++++++++++++++++++++++++++----- 1 file changed, 128 insertions(+), 24 deletions(-) diff --git a/c/driver/postgresql/converter.h b/c/driver/postgresql/converter.h index ef8342cdc5..e944a537c7 100644 --- a/c/driver/postgresql/converter.h +++ b/c/driver/postgresql/converter.h @@ -73,9 +73,9 @@ class PostgresField { PG_TYPE_TIMESTAMP, PG_TYPE_TSQUERY, PG_TYPE_TSVECTOR, - PG_TYPE_TXID_SNAPSHOT, PG_TYPE_UUID, PG_TYPE_XML, + PG_TYPE_DOMAIN, PG_TYPE_ARRAY, PG_TYPE_COMPOSITE, @@ -87,9 +87,9 @@ class PostgresField { explicit PostgresField(PgTypeId id) : PostgresField(id, id) {} + const std::string& field_name() const { return field_name_; } PgTypeId id() const { return id_; } PgTypeId storage_id() const { return storage_id_; } - const std::string& typ_namename() const { return type_name_; } int32_t n() const { return n_; } int32_t precision() const { return precision_; } int32_t scale() const { return scale_; } @@ -97,7 +97,107 @@ class PostgresField { int64_t n_children() const { return static_cast(children_.size()); } const PostgresField* child(int64_t i) const { return children_[i].get(); } + std::string type_name() const { + // e.g., some user-created type + if (type_name_ != "") { + return type_name_; + } + + switch (id_) { + case PG_TYPE_BIGINT: + return "bigint"; + case PG_TYPE_BIGSERIAL: + return "bigserial"; + case PG_TYPE_BIT: + return "bit"; + case PG_TYPE_BIT_VARYING: + return "bit varying"; + case PG_TYPE_BOOLEAN: + return "boolean"; + case PG_TYPE_BOX: + return "box"; + case PG_TYPE_BYTEA: + return "bytea"; + case PG_TYPE_CHARACTER: + return "character"; + case PG_TYPE_CHARACTER_VARYING: + return "character varying"; + case PG_TYPE_CIDR: + return "cidr"; + case PG_TYPE_CIRCLE: + return "circle"; + case PG_TYPE_DATE: + return "date"; + case PG_TYPE_DOUBLE_PRECISION: + return "double precision"; + case PG_TYPE_INET: + return "inet"; + case PG_TYPE_INTEGER: + return "integer"; + case PG_TYPE_INTERVAL: + return "interval"; + case PG_TYPE_JSON: + return "json"; + case PG_TYPE_JSONB: + return "jsonb"; + case PG_TYPE_LINE: + return "line"; + case PG_TYPE_LSEG: + return "lseg"; + case PG_TYPE_MACADDR: + return "macaddr"; + case PG_TYPE_MACADDR8: + return "macaddr8"; + case PG_TYPE_MONEY: + return "money"; + case PG_TYPE_NUMERIC: + return "numeric"; + case PG_TYPE_PATH: + return "path"; + case PG_TYPE_PG_LSN: + return "pg_lsn"; + case PG_TYPE_PG_SNAPSHOT: + return "pg_snapshot"; + case PG_TYPE_POINT: + return "point"; + case PG_TYPE_POLYGON: + return "polygon"; + case PG_TYPE_REAL: + return "real"; + case PG_TYPE_SMALLINT: + return "smallint"; + case PG_TYPE_SMALLSERIAL: + return "smallserial"; + case PG_TYPE_SERIAL: + return "serial"; + case PG_TYPE_TEXT: + return "text"; + case PG_TYPE_TIME: + return "time"; + case PG_TYPE_TIMESTAMP: + return "timestamp"; + case PG_TYPE_TSQUERY: + return "tsquery"; + case PG_TYPE_TSVECTOR: + return "tsvetor"; + case PG_TYPE_UUID: + return "uuid"; + case PG_TYPE_XML: + return "xml"; + + case PG_TYPE_ARRAY: + return "array"; + case PG_TYPE_COMPOSITE: + return "composite"; + case PG_TYPE_RANGE: + return "range"; + default: + return ""; + } + } + private: + std::string field_name_; PgTypeId id_; PgTypeId storage_id_; std::string type_name_; @@ -108,47 +208,51 @@ class PostgresField { std::vector> children_; public: - PostgresField BigInt() { return PostgresField(PG_TYPE_BIGINT); } - PostgresField BigSerial() { return PostgresField(PG_TYPE_BIGSERIAL, PG_TYPE_BIGINT); } - PostgresField Bit(int32_t n) { + static PostgresField BigInt() { return PostgresField(PG_TYPE_BIGINT); } + static PostgresField BigSerial() { + return PostgresField(PG_TYPE_BIGSERIAL, PG_TYPE_BIGINT); + } + static PostgresField Bit(int32_t n) { PostgresField out(PG_TYPE_BIT, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresField BitVarying(int32_t n) { + static PostgresField BitVarying(int32_t n) { PostgresField out(PG_TYPE_BIT_VARYING, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresField Boolean() { return PostgresField(PG_TYPE_BOOLEAN); } - PostgresField Bytea() { return PostgresField(PG_TYPE_BYTEA); } - PostgresField Character(int32_t n) { + static PostgresField Boolean() { return PostgresField(PG_TYPE_BOOLEAN); } + static PostgresField Bytea() { return PostgresField(PG_TYPE_BYTEA); } + static PostgresField Character(int32_t n) { PostgresField out(PG_TYPE_CHARACTER, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresField CharacterVarying(int32_t n) { + static PostgresField CharacterVarying(int32_t n) { PostgresField out(PG_TYPE_CHARACTER_VARYING, PG_TYPE_TEXT); out.n_ = n; return out; } - PostgresField Date() { return PostgresField(PG_TYPE_DATE, PG_TYPE_INTEGER); } - PostgresField DoublePrecision() { return PostgresField(PG_TYPE_DOUBLE_PRECISION); } - PostgresField Integer() { return PostgresField(PG_TYPE_INTEGER); } - PostgresField Numeric(int32_t precision, int32_t scale) { + static PostgresField Date() { return PostgresField(PG_TYPE_DATE, PG_TYPE_INTEGER); } + static PostgresField DoublePrecision() { + return PostgresField(PG_TYPE_DOUBLE_PRECISION); + } + static PostgresField Integer() { return PostgresField(PG_TYPE_INTEGER); } + static PostgresField Numeric(int32_t precision, int32_t scale) { PostgresField out(PG_TYPE_NUMERIC); out.precision_ = precision; out.scale_ = scale; return out; } - PostgresField Real() { return PostgresField(PG_TYPE_REAL); } - PostgresField SmallInt() { return PostgresField(PG_TYPE_SMALLINT); } - PostgresField SmallSerial() { + static PostgresField Real() { return PostgresField(PG_TYPE_REAL); } + static PostgresField SmallInt() { return PostgresField(PG_TYPE_SMALLINT); } + static PostgresField SmallSerial() { return PostgresField(PG_TYPE_SMALLSERIAL, PG_TYPE_SMALLINT); } - PostgresField Serial() { return PostgresField(PG_TYPE_SERIAL, PG_TYPE_INTEGER); } - PostgresField Text() { return PostgresField(PG_TYPE_TEXT); } - PostgresField Time(const std::string& timezone = "") { + static PostgresField Serial() { return PostgresField(PG_TYPE_SERIAL, PG_TYPE_INTEGER); } + static PostgresField Text() { return PostgresField(PG_TYPE_TEXT); } + static PostgresField Time(const std::string& timezone = "") { PostgresField out(PG_TYPE_TIME); out.timezone_ = timezone; if (timezone == "") { @@ -156,26 +260,26 @@ class PostgresField { } return out; } - PostgresField Timestamp(const std::string& timezone = "") { + static PostgresField Timestamp(const std::string& timezone = "") { PostgresField out(PG_TYPE_TIMESTAMP, PG_TYPE_BIGINT); out.timezone_ = timezone; return out; } - PostgresField Array(PostgresField& child) { + static PostgresField Array(PostgresField& child) { PostgresField out(PG_TYPE_ARRAY); std::unique_ptr child_ptr(new PostgresField(std::move(child))); out.children_.push_back(std::move(child_ptr)); return out; } - PostgresField Composite(std::vector> children) { + static PostgresField Composite(std::vector> children) { PostgresField out(PG_TYPE_ARRAY); out.children_ = std::move(children); return out; } - PostgresField Range(PostgresField& child) { + static PostgresField Range(PostgresField& child) { PostgresField out(PG_TYPE_RANGE); std::unique_ptr child_ptr(new PostgresField(std::move(child))); out.children_.push_back(std::move(child_ptr)); From d5464ccc6d99a5fee64e6ce371c9e4aeca43f0c6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 14:10:23 -0300 Subject: [PATCH 08/90] rethinking --- c/driver/postgresql/converter.h | 394 ----------------- c/driver/postgresql/nanoarrow_pg.h | 650 +++++++++++++++++++++++++++++ 2 files changed, 650 insertions(+), 394 deletions(-) delete mode 100644 c/driver/postgresql/converter.h create mode 100644 c/driver/postgresql/nanoarrow_pg.h diff --git a/c/driver/postgresql/converter.h b/c/driver/postgresql/converter.h deleted file mode 100644 index e944a537c7..0000000000 --- a/c/driver/postgresql/converter.h +++ /dev/null @@ -1,394 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include - -#include "type.h" -#include "util.h" - -namespace adbcpq { - -class PostgresField { - public: - // As listed on https://www.postgresql.org/docs/current/datatype.html - enum PgTypeId { - PG_TYPE_UNINITIALIZED, - PG_TYPE_BIGINT, - PG_TYPE_BIGSERIAL, - PG_TYPE_BIT, - PG_TYPE_BIT_VARYING, - PG_TYPE_BOOLEAN, - PG_TYPE_BOX, - PG_TYPE_BYTEA, - PG_TYPE_CHARACTER, - PG_TYPE_CHARACTER_VARYING, - PG_TYPE_CIDR, - PG_TYPE_CIRCLE, - PG_TYPE_DATE, - PG_TYPE_DOUBLE_PRECISION, - PG_TYPE_INET, - PG_TYPE_INTEGER, - PG_TYPE_INTERVAL, - PG_TYPE_JSON, - PG_TYPE_JSONB, - PG_TYPE_LINE, - PG_TYPE_LSEG, - PG_TYPE_MACADDR, - PG_TYPE_MACADDR8, - PG_TYPE_MONEY, - PG_TYPE_NUMERIC, - PG_TYPE_PATH, - PG_TYPE_PG_LSN, - PG_TYPE_PG_SNAPSHOT, - PG_TYPE_POINT, - PG_TYPE_POLYGON, - PG_TYPE_REAL, - PG_TYPE_SMALLINT, - PG_TYPE_SMALLSERIAL, - PG_TYPE_SERIAL, - PG_TYPE_TEXT, - PG_TYPE_TIME, - PG_TYPE_TIMESTAMP, - PG_TYPE_TSQUERY, - PG_TYPE_TSVECTOR, - PG_TYPE_UUID, - PG_TYPE_XML, - PG_TYPE_DOMAIN, - - PG_TYPE_ARRAY, - PG_TYPE_COMPOSITE, - PG_TYPE_RANGE - }; - - PostgresField(PgTypeId id, PgTypeId storage_id) - : id_(id), storage_id_(storage_id), n_(-1), precision_(-1), scale_(-1) {} - - explicit PostgresField(PgTypeId id) : PostgresField(id, id) {} - - const std::string& field_name() const { return field_name_; } - PgTypeId id() const { return id_; } - PgTypeId storage_id() const { return storage_id_; } - int32_t n() const { return n_; } - int32_t precision() const { return precision_; } - int32_t scale() const { return scale_; } - const std::string& timezone() const { return timezone_; } - int64_t n_children() const { return static_cast(children_.size()); } - const PostgresField* child(int64_t i) const { return children_[i].get(); } - - std::string type_name() const { - // e.g., some user-created type - if (type_name_ != "") { - return type_name_; - } - - switch (id_) { - case PG_TYPE_BIGINT: - return "bigint"; - case PG_TYPE_BIGSERIAL: - return "bigserial"; - case PG_TYPE_BIT: - return "bit"; - case PG_TYPE_BIT_VARYING: - return "bit varying"; - case PG_TYPE_BOOLEAN: - return "boolean"; - case PG_TYPE_BOX: - return "box"; - case PG_TYPE_BYTEA: - return "bytea"; - case PG_TYPE_CHARACTER: - return "character"; - case PG_TYPE_CHARACTER_VARYING: - return "character varying"; - case PG_TYPE_CIDR: - return "cidr"; - case PG_TYPE_CIRCLE: - return "circle"; - case PG_TYPE_DATE: - return "date"; - case PG_TYPE_DOUBLE_PRECISION: - return "double precision"; - case PG_TYPE_INET: - return "inet"; - case PG_TYPE_INTEGER: - return "integer"; - case PG_TYPE_INTERVAL: - return "interval"; - case PG_TYPE_JSON: - return "json"; - case PG_TYPE_JSONB: - return "jsonb"; - case PG_TYPE_LINE: - return "line"; - case PG_TYPE_LSEG: - return "lseg"; - case PG_TYPE_MACADDR: - return "macaddr"; - case PG_TYPE_MACADDR8: - return "macaddr8"; - case PG_TYPE_MONEY: - return "money"; - case PG_TYPE_NUMERIC: - return "numeric"; - case PG_TYPE_PATH: - return "path"; - case PG_TYPE_PG_LSN: - return "pg_lsn"; - case PG_TYPE_PG_SNAPSHOT: - return "pg_snapshot"; - case PG_TYPE_POINT: - return "point"; - case PG_TYPE_POLYGON: - return "polygon"; - case PG_TYPE_REAL: - return "real"; - case PG_TYPE_SMALLINT: - return "smallint"; - case PG_TYPE_SMALLSERIAL: - return "smallserial"; - case PG_TYPE_SERIAL: - return "serial"; - case PG_TYPE_TEXT: - return "text"; - case PG_TYPE_TIME: - return "time"; - case PG_TYPE_TIMESTAMP: - return "timestamp"; - case PG_TYPE_TSQUERY: - return "tsquery"; - case PG_TYPE_TSVECTOR: - return "tsvetor"; - case PG_TYPE_UUID: - return "uuid"; - case PG_TYPE_XML: - return "xml"; - - case PG_TYPE_ARRAY: - return "array"; - case PG_TYPE_COMPOSITE: - return "composite"; - case PG_TYPE_RANGE: - return "range"; - default: - return ""; - } - } - - private: - std::string field_name_; - PgTypeId id_; - PgTypeId storage_id_; - std::string type_name_; - int32_t n_; - int32_t precision_; - int32_t scale_; - std::string timezone_; - std::vector> children_; - - public: - static PostgresField BigInt() { return PostgresField(PG_TYPE_BIGINT); } - static PostgresField BigSerial() { - return PostgresField(PG_TYPE_BIGSERIAL, PG_TYPE_BIGINT); - } - static PostgresField Bit(int32_t n) { - PostgresField out(PG_TYPE_BIT, PG_TYPE_TEXT); - out.n_ = n; - return out; - } - static PostgresField BitVarying(int32_t n) { - PostgresField out(PG_TYPE_BIT_VARYING, PG_TYPE_TEXT); - out.n_ = n; - return out; - } - static PostgresField Boolean() { return PostgresField(PG_TYPE_BOOLEAN); } - static PostgresField Bytea() { return PostgresField(PG_TYPE_BYTEA); } - static PostgresField Character(int32_t n) { - PostgresField out(PG_TYPE_CHARACTER, PG_TYPE_TEXT); - out.n_ = n; - return out; - } - static PostgresField CharacterVarying(int32_t n) { - PostgresField out(PG_TYPE_CHARACTER_VARYING, PG_TYPE_TEXT); - out.n_ = n; - return out; - } - static PostgresField Date() { return PostgresField(PG_TYPE_DATE, PG_TYPE_INTEGER); } - static PostgresField DoublePrecision() { - return PostgresField(PG_TYPE_DOUBLE_PRECISION); - } - static PostgresField Integer() { return PostgresField(PG_TYPE_INTEGER); } - static PostgresField Numeric(int32_t precision, int32_t scale) { - PostgresField out(PG_TYPE_NUMERIC); - out.precision_ = precision; - out.scale_ = scale; - return out; - } - static PostgresField Real() { return PostgresField(PG_TYPE_REAL); } - static PostgresField SmallInt() { return PostgresField(PG_TYPE_SMALLINT); } - static PostgresField SmallSerial() { - return PostgresField(PG_TYPE_SMALLSERIAL, PG_TYPE_SMALLINT); - } - static PostgresField Serial() { return PostgresField(PG_TYPE_SERIAL, PG_TYPE_INTEGER); } - static PostgresField Text() { return PostgresField(PG_TYPE_TEXT); } - static PostgresField Time(const std::string& timezone = "") { - PostgresField out(PG_TYPE_TIME); - out.timezone_ = timezone; - if (timezone == "") { - out.storage_id_ = PG_TYPE_BIGINT; - } - return out; - } - static PostgresField Timestamp(const std::string& timezone = "") { - PostgresField out(PG_TYPE_TIMESTAMP, PG_TYPE_BIGINT); - out.timezone_ = timezone; - return out; - } - - static PostgresField Array(PostgresField& child) { - PostgresField out(PG_TYPE_ARRAY); - std::unique_ptr child_ptr(new PostgresField(std::move(child))); - out.children_.push_back(std::move(child_ptr)); - return out; - } - - static PostgresField Composite(std::vector> children) { - PostgresField out(PG_TYPE_ARRAY); - out.children_ = std::move(children); - return out; - } - - static PostgresField Range(PostgresField& child) { - PostgresField out(PG_TYPE_RANGE); - std::unique_ptr child_ptr(new PostgresField(std::move(child))); - out.children_.push_back(std::move(child_ptr)); - return out; - } -}; - -class ArrowConverter { - public: - ArrowConverter(ArrowType type, PgType pg_type) - : type_(type), pg_type_(pg_type), offsets_(nullptr), data_(nullptr) { - memset(&schema_view_, 0, sizeof(ArrowSchemaView)); - } - - virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, type_)); - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); - return NANOARROW_OK; - } - - virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); - - for (int32_t i = 0; i < 3; i++) { - switch (schema_view_.layout.buffer_type[i]) { - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - if (schema_view_.layout.element_size_bits[i] == 32) { - offsets_ = ArrowArrayBuffer(array, i); - } - break; - case NANOARROW_BUFFER_TYPE_DATA: - data_ = ArrowArrayBuffer(array, i); - break; - default: - break; - } - } - - return NANOARROW_OK; - } - - virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) = 0; - - virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { - return NANOARROW_OK; - } - - protected: - PgType pg_type_; - ArrowType type_; - ArrowSchemaView schema_view_; - ArrowBuffer* offsets_; - ArrowBuffer* large_offsets_; - ArrowBuffer* data_; -}; - -// Converter for Pg->Arrow conversions whose representations are identical (minus -// the bswap from network endian). This includes all integral and float types. -class NumericArrowConverter : public ArrowConverter { - public: - NumericArrowConverter(ArrowType type, PgType pg_type) - : ArrowConverter(type, pg_type), data_(nullptr) {} - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(ArrowConverter::InitSchema(schema)); - bitwidth_ = schema_view_.layout.element_size_bits[1]; - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - return ArrowBufferAppendBufferView(data_, data); - } - - ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) override { - BufferToHostEndian(data_->data, data_->size_bytes, bitwidth_); - return NANOARROW_OK; - } - - private: - ArrowBuffer* data_; - int32_t bitwidth_; -}; - -// Converter for Pg->Arrow conversions whose Arrow representation is simply the -// bytes of the field representation. This can be used with binary and string -// Arrow types and any postgres type. -class BinaryArrowConverter : public ArrowConverter { - public: - BinaryArrowConverter(ArrowType type, PgType pg_type) - : ArrowConverter(type, pg_type), data_(nullptr) {} - - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - if ((data_->size_bytes + data.size_bytes) > std::numeric_limits::max()) { - return EOVERFLOW; - } - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, (int32_t)data_->size_bytes)); - return NANOARROW_OK; - } - - private: - ArrowBuffer* offsets_; - ArrowBuffer* data_; - int32_t bitwidth_; -}; - -} // namespace adbcpq diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h new file mode 100644 index 0000000000..7b2ace61d1 --- /dev/null +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -0,0 +1,650 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "type.h" +#include "util.h" + +namespace adbcpq { + +class PostgresType { + public: + // From SELECT DISTINCT typreceive FROM pg_type; + enum PgRecv { + PG_RECV_UNINITIALIZED, + PG_RECV_ANYARRAY, + PG_RECV_ANYCOMPATIBLEARRAY, + PG_RECV_ARRAY, + PG_RECV_BIT, + PG_RECV_BOOL, + PG_RECV_BOX, + PG_RECV_BPCHAR, + PG_RECV_BRIN_BLOOM_SUMMARY, + PG_RECV_BRIN_MINMAX_MULTI_SUMMARY, + PG_RECV_BYTEA, + PG_RECV_CASH, + PG_RECV_CHAR, + PG_RECV_CIDR, + PG_RECV_CID, + PG_RECV_CIRCLE, + PG_RECV_CSTRING, + PG_RECV_DATE, + PG_RECV_DOMAIN, + PG_RECV_FLOAT4, + PG_RECV_FLOAT8, + PG_RECV_INET, + PG_RECV_INT2, + PG_RECV_INT2VECTOR, + PG_RECV_INT4, + PG_RECV_INT8, + PG_RECV_INTERVAL, + PG_RECV_JSON, + PG_RECV_JSONB, + PG_RECV_JSONPATH, + PG_RECV_LINE, + PG_RECV_LSEG, + PG_RECV_MACADDR, + PG_RECV_MACADDR8, + PG_RECV_MULTIRANGE, + PG_RECV_NAME, + PG_RECV_NUMERIC, + PG_RECV_OID, + PG_RECV_OIDVECTOR, + PG_RECV_PATH, + PG_RECV_PG_DDL_COMMAND, + PG_RECV_PG_DEPENDENCIES, + PG_RECV_PG_LSN, + PG_RECV_PG_MCV_LIST, + PG_RECV_PG_NDISTINCT, + PG_RECV_PG_NODE_TREE, + PG_RECV_PG_SNAPSHOT, + PG_RECV_POINT, + PG_RECV_POLY, + PG_RECV_RANGE, + PG_RECV_RECORD, + PG_RECV_REGCLASS, + PG_RECV_REGCOLLATION, + PG_RECV_REGCONFIG, + PG_RECV_REGDICTIONARY, + PG_RECV_REGNAMESPACE, + PG_RECV_REGOPERATOR, + PG_RECV_REGOPER, + PG_RECV_REGPROCEDURE, + PG_RECV_REGPROC, + PG_RECV_REGROLE, + PG_RECV_REGTYPE, + PG_RECV_TEXT, + PG_RECV_TID, + PG_RECV_TIME, + PG_RECV_TIMESTAMP, + PG_RECV_TIMESTAMPTZ, + PG_RECV_TIMETZ, + PG_RECV_TSQUERY, + PG_RECV_TSVECTOR, + PG_RECV_TXID_SNAPSHOT, + PG_RECV_UNKNOWN, + PG_RECV_UUID, + PG_RECV_VARBIT, + PG_RECV_VARCHAR, + PG_RECV_VOID, + PG_RECV_XID8, + PG_RECV_XID, + PG_RECV_XML + }; + + static std::vector PgRecvAllBase() { + return {PG_RECV_BIT, PG_RECV_BOOL, PG_RECV_BYTEA, PG_RECV_CASH, + PG_RECV_CHAR, PG_RECV_DATE, PG_RECV_FLOAT4, PG_RECV_FLOAT8, + PG_RECV_INT4, PG_RECV_INT8, PG_RECV_INTERVAL, PG_RECV_NUMERIC, + PG_RECV_OID, PG_RECV_TEXT, PG_RECV_TIME, PG_RECV_TIMESTAMP, + PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, PG_RECV_UUID, PG_RECV_VARBIT, + PG_RECV_VARCHAR}; + } + + static std::string PgRecvName(PgRecv recv) { + switch (recv) { + case PG_RECV_ANYARRAY: + return "anyarray_recv"; + case PG_RECV_ANYCOMPATIBLEARRAY: + return "anycompatiblearray_recv"; + case PG_RECV_ARRAY: + return "array_recv"; + case PG_RECV_BIT: + return "bit_recv"; + case PG_RECV_BOOL: + return "boolrecv"; + case PG_RECV_BOX: + return "box_recv"; + case PG_RECV_BPCHAR: + return "bpcharrecv"; + case PG_RECV_BRIN_BLOOM_SUMMARY: + return "brin_bloom_summary_recv"; + case PG_RECV_BRIN_MINMAX_MULTI_SUMMARY: + return "brin_minmax_multi_summary_recv"; + case PG_RECV_BYTEA: + return "bytearecv"; + case PG_RECV_CASH: + return "cash_recv"; + case PG_RECV_CHAR: + return "charrecv"; + case PG_RECV_CIDR: + return "cidr_recv"; + case PG_RECV_CID: + return "cidrecv"; + case PG_RECV_CIRCLE: + return "circle_recv"; + case PG_RECV_CSTRING: + return "cstring_recv"; + case PG_RECV_DATE: + return "date_recv"; + case PG_RECV_DOMAIN: + return "domain_recv"; + case PG_RECV_FLOAT4: + return "float4recv"; + case PG_RECV_FLOAT8: + return "float8recv"; + case PG_RECV_INET: + return "inet_recv"; + case PG_RECV_INT2: + return "int2recv"; + case PG_RECV_INT2VECTOR: + return "int2vectorrecv"; + case PG_RECV_INT4: + return "int4recv"; + case PG_RECV_INT8: + return "int8recv"; + case PG_RECV_INTERVAL: + return "interval_recv"; + case PG_RECV_JSON: + return "json_recv"; + case PG_RECV_JSONB: + return "jsonb_recv"; + case PG_RECV_JSONPATH: + return "jsonpath_recv"; + case PG_RECV_LINE: + return "line_recv"; + case PG_RECV_LSEG: + return "lseg_recv"; + case PG_RECV_MACADDR: + return "macaddr_recv"; + case PG_RECV_MACADDR8: + return "macaddr8_recv"; + case PG_RECV_MULTIRANGE: + return "multirange_recv"; + case PG_RECV_NAME: + return "namerecv"; + case PG_RECV_NUMERIC: + return "numeric_recv"; + case PG_RECV_OID: + return "oidrecv"; + case PG_RECV_OIDVECTOR: + return "oidvectorrecv"; + case PG_RECV_PATH: + return "path_recv"; + case PG_RECV_PG_DDL_COMMAND: + return "pg_ddl_command_recv"; + case PG_RECV_PG_DEPENDENCIES: + return "pg_dependencies_recv"; + case PG_RECV_PG_LSN: + return "pg_lsn_recv"; + case PG_RECV_PG_MCV_LIST: + return "pg_mcv_list_recv"; + case PG_RECV_PG_NDISTINCT: + return "pg_ndistinct_recv"; + case PG_RECV_PG_NODE_TREE: + return "pg_node_tree_recv"; + case PG_RECV_PG_SNAPSHOT: + return "pg_snapshot_recv"; + case PG_RECV_POINT: + return "point_recv"; + case PG_RECV_POLY: + return "poly_recv"; + case PG_RECV_RANGE: + return "range_recv"; + case PG_RECV_RECORD: + return "record_recv"; + case PG_RECV_REGCLASS: + return "regclassrecv"; + case PG_RECV_REGCOLLATION: + return "regcollationrecv"; + case PG_RECV_REGCONFIG: + return "regconfigrecv"; + case PG_RECV_REGDICTIONARY: + return "regdictionaryrecv"; + case PG_RECV_REGNAMESPACE: + return "regnamespacerecv"; + case PG_RECV_REGOPERATOR: + return "regoperatorrecv"; + case PG_RECV_REGOPER: + return "regoperrecv"; + case PG_RECV_REGPROCEDURE: + return "regprocedurerecv"; + case PG_RECV_REGPROC: + return "regprocrecv"; + case PG_RECV_REGROLE: + return "regrolerecv"; + case PG_RECV_REGTYPE: + return "regtyperecv"; + case PG_RECV_TEXT: + return "textrecv"; + case PG_RECV_TID: + return "tidrecv"; + case PG_RECV_TIME: + return "time_recv"; + case PG_RECV_TIMESTAMP: + return "timestamp_recv"; + case PG_RECV_TIMESTAMPTZ: + return "timestamptz_recv"; + case PG_RECV_TIMETZ: + return "timetz_recv"; + case PG_RECV_TSQUERY: + return "tsqueryrecv"; + case PG_RECV_TSVECTOR: + return "tsvectorrecv"; + case PG_RECV_TXID_SNAPSHOT: + return "txid_snapshot_recv"; + case PG_RECV_UNKNOWN: + return "unknownrecv"; + case PG_RECV_UUID: + return "uuid_recv"; + case PG_RECV_VARBIT: + return "varbit_recv"; + case PG_RECV_VARCHAR: + return "varcharrecv"; + case PG_RECV_VOID: + return "void_recv"; + case PG_RECV_XID8: + return "xid8recv"; + case PG_RECV_XID: + return "xidrecv"; + case PG_RECV_XML: + return "xml_recv"; + default: + return ""; + } + } + + static std::string PgRecvTypname(PgRecv recv) { + switch (recv) { + case PG_RECV_BIT: + return "bit"; + case PG_RECV_BOOL: + return "bool"; + case PG_RECV_BYTEA: + return "bytea"; + case PG_RECV_CASH: + return "cash"; + case PG_RECV_CHAR: + return "char"; + case PG_RECV_DATE: + return "date"; + case PG_RECV_FLOAT4: + return "float4"; + case PG_RECV_FLOAT8: + return "float8"; + case PG_RECV_INT2: + return "int2"; + case PG_RECV_INT4: + return "int4"; + case PG_RECV_INT8: + return "int8"; + case PG_RECV_INTERVAL: + return "interval"; + case PG_RECV_NUMERIC: + return "numeric"; + case PG_RECV_OID: + return "oid"; + case PG_RECV_TEXT: + return "text"; + case PG_RECV_TIME: + return "time"; + case PG_RECV_TIMESTAMP: + return "timestamp"; + case PG_RECV_TIMESTAMPTZ: + return "timestamptz"; + case PG_RECV_TIMETZ: + return "timetz"; + case PG_RECV_UUID: + return "uuid"; + case PG_RECV_VARBIT: + return "varbit"; + case PG_RECV_VARCHAR: + return "varchar"; + default: + return ""; + } + } + + PostgresType(PgRecv recv) : oid_(0), recv_(recv) {} + + PostgresType() : PostgresType(PG_RECV_UNINITIALIZED) {} + + void AddRecordChild(const std::string& field_name, const PostgresType& type) { + PostgresType child(type); + children_.push_back(child.WithFieldName(field_name)); + } + + PostgresType WithFieldName(const std::string& field_name) const { + PostgresType out(*this); + out.field_name_ = field_name; + return out; + } + + PostgresType WithPgTypeInfo(uint32_t oid, const std::string& typname) const { + PostgresType out(*this); + out.oid_ = oid; + out.typname_ = typname; + return out; + } + + PostgresType Array(uint32_t oid, const std::string& typname) const { + PostgresType out(PG_RECV_ARRAY); + out.children_.push_back(WithFieldName("item")); + return out; + } + + PostgresType Domain(uint32_t oid, const std::string& typname) { + return WithPgTypeInfo(oid, typname); + } + + PostgresType Range(uint32_t oid, const std::string& typname) const { + PostgresType out(PG_RECV_RANGE); + out.children_.push_back(WithFieldName("item")); + return out; + } + + uint32_t oid() const { return oid_; } + PgRecv recv() const { return recv_; } + const std::string& typname() { return typname_; } + const std::string& field_name() { return field_name_; } + const int64_t n_children() const { return static_cast(children_.size()); } + const PostgresType* child(int64_t i) const { return &children_[i]; } + + ArrowErrorCode SetSchema(ArrowSchema* schema) const { + switch (recv_) { + case PG_RECV_RECORD: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); + for (int64_t i = 0; i < n_children(); i++) { + NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); + } + break; + case PG_RECV_ARRAY: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); + NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); + break; + case PG_RECV_INT2: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); + break; + case PG_RECV_INT4: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); + break; + case PG_RECV_INT8: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); + break; + case PG_RECV_FLOAT4: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); + break; + case PG_RECV_FLOAT8: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); + break; + case PG_RECV_CHAR: + case PG_RECV_VARCHAR: + case PG_RECV_TEXT: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); + break; + case PG_RECV_BYTEA: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + break; + default: { + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + break; + } + } + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, field_name_.c_str())); + return NANOARROW_OK; + } + + private: + uint32_t oid_; + PgRecv recv_; + std::string typname_; + std::string field_name_; + std::vector children_; + + public: + static std::unordered_map AllBase() { + std::unordered_map out; + for (PgRecv recv : PgRecvAllBase()) { + PostgresType type(recv); + type.typname_ = PgRecvTypname(recv); + out.insert({PgRecvName(recv), recv}); + } + + return out; + } +}; + +class PostgresTypeResolver { + struct Item { + uint32_t oid; + const char* typname; + const char* typreceive; + uint32_t child_oid; + uint32_t base_oid; + uint32_t class_oid; + }; + + public: + PostgresTypeResolver() : base_(PostgresType::AllBase()) {} + + ArrowErrorCode Find(uint32_t oid, PostgresType* type_out, ArrowError* error) { + auto result = mapping_.find(oid); + if (result == mapping_.end()) { + ArrowErrorSet(error, "Postgres type with oid %ld not found", + static_cast(oid)); + return EINVAL; + } + + *type_out = (*result).second; + return NANOARROW_OK; + } + + ArrowErrorCode Insert(const Item& item, ArrowError* error) { + auto result = base_.find(item.typreceive); + if (result == base_.end()) { + ArrowErrorSet(error, "Base type not found for type '%s' with receive function '%s'", + item.typname, item.typreceive); + return ENOTSUP; + } + + const PostgresType& base = (*result).second; + PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); + + switch (base.recv()) { + case PostgresType::PG_RECV_ARRAY: { + PostgresType child; + NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); + mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); + break; + } + + case PostgresType::PG_RECV_RECORD: { + std::vector> child_desc; + NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); + + PostgresType out(PostgresType::PG_RECV_RECORD); + for (const auto& child_item : child_desc) { + PostgresType child; + NANOARROW_RETURN_NOT_OK(Find(child_item.first, &child, error)); + out.AddRecordChild(child_item.second, child); + } + + mapping_.insert({item.oid, out}); + break; + } + + case PostgresType::PG_RECV_DOMAIN: { + PostgresType base_type; + NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); + mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); + break; + } + + case PostgresType::PG_RECV_RANGE: { + PostgresType base_type; + NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); + mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); + break; + } + + default: + mapping_.insert({item.oid, type}); + break; + } + + return NANOARROW_OK; + } + + virtual ArrowErrorCode ResolveClass(uint32_t oid, + std::vector>* out, + ArrowError* error) { + ArrowErrorSet(error, "Class definition with oid %ld not found", + static_cast(oid)); + return EINVAL; + } + + private: + std::unordered_map mapping_; + std::unordered_map base_; +}; + +class ArrowConverter { + public: + ArrowConverter(ArrowType type, PgType pg_type) + : type_(type), pg_type_(pg_type), offsets_(nullptr), data_(nullptr) { + memset(&schema_view_, 0, sizeof(ArrowSchemaView)); + } + + virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, type_)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); + return NANOARROW_OK; + } + + virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); + + for (int32_t i = 0; i < 3; i++) { + switch (schema_view_.layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + if (schema_view_.layout.element_size_bits[i] == 32) { + offsets_ = ArrowArrayBuffer(array, i); + } + break; + case NANOARROW_BUFFER_TYPE_DATA: + data_ = ArrowArrayBuffer(array, i); + break; + default: + break; + } + } + + return NANOARROW_OK; + } + + virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) = 0; + + virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { + return NANOARROW_OK; + } + + protected: + PgType pg_type_; + ArrowType type_; + ArrowSchemaView schema_view_; + ArrowBuffer* offsets_; + ArrowBuffer* large_offsets_; + ArrowBuffer* data_; +}; + +// Converter for Pg->Arrow conversions whose representations are identical (minus +// the bswap from network endian). This includes all integral and float types. +class NumericArrowConverter : public ArrowConverter { + public: + NumericArrowConverter(ArrowType type, PgType pg_type) + : ArrowConverter(type, pg_type), data_(nullptr) {} + + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(ArrowConverter::InitSchema(schema)); + bitwidth_ = schema_view_.layout.element_size_bits[1]; + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + return ArrowBufferAppendBufferView(data_, data); + } + + ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) override { + BufferToHostEndian(data_->data, data_->size_bytes, bitwidth_); + return NANOARROW_OK; + } + + private: + ArrowBuffer* data_; + int32_t bitwidth_; +}; + +// Converter for Pg->Arrow conversions whose Arrow representation is simply the +// bytes of the field representation. This can be used with binary and string +// Arrow types and any postgres type. +class BinaryArrowConverter : public ArrowConverter { + public: + BinaryArrowConverter(ArrowType type, PgType pg_type) + : ArrowConverter(type, pg_type), data_(nullptr) {} + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if ((data_->size_bytes + data.size_bytes) > std::numeric_limits::max()) { + return EOVERFLOW; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, (int32_t)data_->size_bytes)); + return NANOARROW_OK; + } + + private: + ArrowBuffer* offsets_; + ArrowBuffer* data_; + int32_t bitwidth_; +}; + +} // namespace adbcpq From 20e67b6eefda33063d45ded1d525235cfc1a1d6f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 14:16:43 -0300 Subject: [PATCH 09/90] add test to cmake --- c/driver/postgresql/CMakeLists.txt | 1 + c/driver/postgresql/nanoarrow_pg_test.cc | 26 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 c/driver/postgresql/nanoarrow_pg_test.cc diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 4d194f8f16..061c9e22e2 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -77,6 +77,7 @@ if(ADBC_BUILD_TESTS) PREFIX adbc SOURCES + nanoarrow_pg_test.cc postgresql_test.cc ../../validation/adbc_validation.cc ../../validation/adbc_validation_util.cc diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc new file mode 100644 index 0000000000..bee86546ad --- /dev/null +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "nanoarrow_pg.h" + + +TEST(PostgresNanoarrowTest, PostgresType) { + EXPECT_EQ(4, 3); +} From 32266454f10ab0e6b1e7af3b26629cd2775016aa Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 14:56:03 -0300 Subject: [PATCH 10/90] test the basics --- c/driver/postgresql/nanoarrow_pg.h | 4 +++ c/driver/postgresql/nanoarrow_pg_test.cc | 40 ++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 7b2ace61d1..7a8e30577c 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -364,6 +364,8 @@ class PostgresType { PostgresType Array(uint32_t oid, const std::string& typname) const { PostgresType out(PG_RECV_ARRAY); out.children_.push_back(WithFieldName("item")); + out.oid_ = oid; + out.typname_ = typname; return out; } @@ -374,6 +376,8 @@ class PostgresType { PostgresType Range(uint32_t oid, const std::string& typname) const { PostgresType out(PG_RECV_RANGE); out.children_.push_back(WithFieldName("item")); + out.oid_ = oid; + out.typname_ = typname; return out; } diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index bee86546ad..2e2a7d5930 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -20,7 +20,43 @@ #include "nanoarrow_pg.h" +using namespace adbcpq; -TEST(PostgresNanoarrowTest, PostgresType) { - EXPECT_EQ(4, 3); + +TEST(PostgresNanoarrowTest, PostgresTypeBasic) { + PostgresType type(PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.field_name(), ""); + EXPECT_EQ(type.typname(), ""); + EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.oid(), 0); + EXPECT_EQ(type.n_children(), 0); + + PostgresType with_info = type.WithPgTypeInfo(1234, "some_typename"); + EXPECT_EQ(with_info.oid(), 1234); + EXPECT_EQ(with_info.typname(), "some_typename"); + EXPECT_EQ(with_info.recv(), type.recv()); + + PostgresType with_name = type.WithFieldName("some name"); + EXPECT_EQ(with_name.field_name(), "some name"); + EXPECT_EQ(with_name.oid(), type.oid()); + EXPECT_EQ(with_name.recv(), type.recv()); + + PostgresType array = type.Array(12345, "array type name"); + EXPECT_EQ(array.oid(), 12345); + EXPECT_EQ(array.typname(), "array type name"); + EXPECT_EQ(array.n_children(), 1); + EXPECT_EQ(array.child(0)->oid(), type.oid()); + EXPECT_EQ(array.child(0)->recv(), type.recv()); + + PostgresType range = type.Range(12345, "range type name"); + EXPECT_EQ(range.oid(), 12345); + EXPECT_EQ(range.typname(), "range type name"); + EXPECT_EQ(range.n_children(), 1); + EXPECT_EQ(range.child(0)->oid(), type.oid()); + EXPECT_EQ(range.child(0)->recv(), type.recv()); + + PostgresType domain = type.Domain(123456, "domain type name"); + EXPECT_EQ(domain.oid(), 123456); + EXPECT_EQ(domain.typname(), "domain type name"); + EXPECT_EQ(domain.recv(), type.recv()); } From 5358db51f6bb58de38f4a9c2b46602255eba1914 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 15:00:20 -0300 Subject: [PATCH 11/90] test reocrds --- c/driver/postgresql/nanoarrow_pg.h | 2 +- c/driver/postgresql/nanoarrow_pg_test.cc | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 7a8e30577c..23b6a3295a 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -384,7 +384,7 @@ class PostgresType { uint32_t oid() const { return oid_; } PgRecv recv() const { return recv_; } const std::string& typname() { return typname_; } - const std::string& field_name() { return field_name_; } + const std::string& field_name() const { return field_name_; } const int64_t n_children() const { return static_cast(children_.size()); } const PostgresType* child(int64_t i) const { return &children_[i]; } diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index 2e2a7d5930..2ea021c8cc 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -59,4 +59,11 @@ TEST(PostgresNanoarrowTest, PostgresTypeBasic) { EXPECT_EQ(domain.oid(), 123456); EXPECT_EQ(domain.typname(), "domain type name"); EXPECT_EQ(domain.recv(), type.recv()); + + PostgresType record(PostgresType::PG_RECV_RECORD); + record.AddRecordChild("col1", type); + EXPECT_EQ(record.recv(), PostgresType::PG_RECV_RECORD); + EXPECT_EQ(record.n_children(), 1); + EXPECT_EQ(record.child(0)->recv(), type.recv()); + EXPECT_EQ(record.child(0)->field_name(), "col1"); } From 82b687302b2944cd7883b29aa0c185bd4b717659 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 15:21:29 -0300 Subject: [PATCH 12/90] test init schema --- c/driver/postgresql/nanoarrow_pg.h | 52 +++++++++++++------ c/driver/postgresql/nanoarrow_pg_test.cc | 64 ++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 14 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 23b6a3295a..bb7c80b654 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -24,7 +24,7 @@ #include #include -#include +#include #include "type.h" #include "util.h" @@ -122,7 +122,8 @@ class PostgresType { PG_RECV_INT4, PG_RECV_INT8, PG_RECV_INTERVAL, PG_RECV_NUMERIC, PG_RECV_OID, PG_RECV_TEXT, PG_RECV_TIME, PG_RECV_TIMESTAMP, PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, PG_RECV_UUID, PG_RECV_VARBIT, - PG_RECV_VARCHAR}; + PG_RECV_VARCHAR, PG_RECV_ARRAY, PG_RECV_RECORD, PG_RECV_RANGE, + PG_RECV_DOMAIN}; } static std::string PgRecvName(PgRecv recv) { @@ -334,6 +335,15 @@ class PostgresType { return "varbit"; case PG_RECV_VARCHAR: return "varchar"; + + case PG_RECV_ARRAY: + return "array"; + case PG_RECV_RECORD: + return "record"; + case PG_RECV_RANGE: + return "range"; + case PG_RECV_DOMAIN: + return "domain"; default: return ""; } @@ -361,7 +371,7 @@ class PostgresType { return out; } - PostgresType Array(uint32_t oid, const std::string& typname) const { + PostgresType Array(uint32_t oid = 0, const std::string& typname = "") const { PostgresType out(PG_RECV_ARRAY); out.children_.push_back(WithFieldName("item")); out.oid_ = oid; @@ -373,7 +383,7 @@ class PostgresType { return WithPgTypeInfo(oid, typname); } - PostgresType Range(uint32_t oid, const std::string& typname) const { + PostgresType Range(uint32_t oid = 0, const std::string& typname = "") const { PostgresType out(PG_RECV_RANGE); out.children_.push_back(WithFieldName("item")); out.oid_ = oid; @@ -390,15 +400,8 @@ class PostgresType { ArrowErrorCode SetSchema(ArrowSchema* schema) const { switch (recv_) { - case PG_RECV_RECORD: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); - for (int64_t i = 0; i < n_children(); i++) { - NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); - } - break; - case PG_RECV_ARRAY: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); - NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); + case PG_RECV_BOOL: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; case PG_RECV_INT2: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); @@ -418,13 +421,34 @@ class PostgresType { case PG_RECV_CHAR: case PG_RECV_VARCHAR: case PG_RECV_TEXT: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; case PG_RECV_BYTEA: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; + + case PG_RECV_RECORD: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); + for (int64_t i = 0; i < n_children(); i++) { + NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); + } + break; + + case PG_RECV_ARRAY: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); + NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); + break; default: { + // For any types we don't explicitly know how to deal with, we can still + // return the bytes postgres gives us and attach the type name as metadata NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + nanoarrow::UniqueBuffer buffer; + ArrowMetadataBuilderInit(buffer.get(), nullptr); + NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( + buffer.get(), ArrowCharView("ADBC:posgresql:typname"), + ArrowCharView(typname_.c_str()))); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetMetadata(schema, reinterpret_cast(buffer->data))); break; } } diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index 2ea021c8cc..6b3b558961 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -67,3 +67,67 @@ TEST(PostgresNanoarrowTest, PostgresTypeBasic) { EXPECT_EQ(record.child(0)->recv(), type.recv()); EXPECT_EQ(record.child(0)->field_name(), "col1"); } + +TEST(PostgresNanoarrowTest, PostgresTypeSetSchema) { + ArrowSchema schema; + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BOOL).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "b"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_INT2).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "s"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_INT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "i"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_INT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "l"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_FLOAT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "f"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_FLOAT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "g"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_TEXT).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "u"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BYTEA).SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "z"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BOOL).Array().SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "+l"); + EXPECT_STREQ(schema.children[0]->format, "b"); + schema.release(&schema); + + ArrowSchemaInit(&schema); + PostgresType record(PostgresType::PG_RECV_RECORD); + record.AddRecordChild("col1", PostgresType(PostgresType::PG_RECV_BOOL)); + EXPECT_EQ(record.SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "+s"); + EXPECT_STREQ(schema.children[0]->format, "b"); + schema.release(&schema); +} + +TEST(PostgresNanoarrowTest, PostgresTypeAllBase) { + auto base_types = PostgresType::AllBase(); + + EXPECT_EQ(base_types.size(), PostgresType::PgRecvAllBase().size()); +} From b4e8c7c3debc79207998dc35b7c7175a7a12a106 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 15:55:18 -0300 Subject: [PATCH 13/90] better unknown support --- c/driver/postgresql/nanoarrow_pg.h | 2 +- c/driver/postgresql/nanoarrow_pg_test.cc | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index bb7c80b654..df440adc62 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -470,7 +470,7 @@ class PostgresType { for (PgRecv recv : PgRecvAllBase()) { PostgresType type(recv); type.typname_ = PgRecvTypname(recv); - out.insert({PgRecvName(recv), recv}); + out.insert({PgRecvName(recv), type}); } return out; diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index 6b3b558961..065c0c08ca 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -124,10 +124,21 @@ TEST(PostgresNanoarrowTest, PostgresTypeSetSchema) { EXPECT_STREQ(schema.format, "+s"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); + + ArrowSchemaInit(&schema); + PostgresType unknown(PostgresType::PG_RECV_BRIN_MINMAX_MULTI_SUMMARY); + EXPECT_EQ(unknown.WithPgTypeInfo(0, "some_name").SetSchema(&schema), NANOARROW_OK); + EXPECT_STREQ(schema.format, "z"); + + ArrowStringView value = ArrowCharView(""); + ArrowMetadataGetValue(schema.metadata, ArrowCharView("ADBC:posgresql:typname"), &value); + EXPECT_EQ(std::string(value.data, value.size_bytes), "some_name"); + schema.release(&schema); } TEST(PostgresNanoarrowTest, PostgresTypeAllBase) { auto base_types = PostgresType::AllBase(); - + EXPECT_EQ(base_types["array_recv"].recv(), PostgresType::PG_RECV_ARRAY); + EXPECT_EQ(base_types["array_recv"].typname(), "array"); EXPECT_EQ(base_types.size(), PostgresType::PgRecvAllBase().size()); } From ecfacfef01f7a275d068d8c41514256c912bd2a3 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 16:15:53 -0300 Subject: [PATCH 14/90] some resolver tests --- c/driver/postgresql/nanoarrow_pg.h | 2 +- c/driver/postgresql/nanoarrow_pg_test.cc | 61 +++++++++++++++++++++++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index df440adc62..51ae3502b7 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -478,6 +478,7 @@ class PostgresType { }; class PostgresTypeResolver { + public: struct Item { uint32_t oid; const char* typname; @@ -487,7 +488,6 @@ class PostgresTypeResolver { uint32_t class_oid; }; - public: PostgresTypeResolver() : base_(PostgresType::AllBase()) {} ArrowErrorCode Find(uint32_t oid, PostgresType* type_out, ArrowError* error) { diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index 065c0c08ca..c3b4c3e28e 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -22,7 +22,6 @@ using namespace adbcpq; - TEST(PostgresNanoarrowTest, PostgresTypeBasic) { PostgresType type(PostgresType::PG_RECV_BOOL); EXPECT_EQ(type.field_name(), ""); @@ -112,7 +111,8 @@ TEST(PostgresNanoarrowTest, PostgresTypeSetSchema) { schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BOOL).Array().SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BOOL).Array().SetSchema(&schema), + NANOARROW_OK); EXPECT_STREQ(schema.format, "+l"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); @@ -142,3 +142,60 @@ TEST(PostgresNanoarrowTest, PostgresTypeAllBase) { EXPECT_EQ(base_types["array_recv"].typname(), "array"); EXPECT_EQ(base_types.size(), PostgresType::PgRecvAllBase().size()); } + +TEST(PostgresNanoarrowTest, PostgresTypeResolver) { + PostgresTypeResolver resolver; + ArrowError error; + PostgresType type; + PostgresTypeResolver::Item item; + + // Check error for type not found + EXPECT_EQ(resolver.Find(123, &type, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), "Postgres type with oid 123 not found"); + + // Check error for unsupported recv name + item.oid = 123; + item.typname = "invalid"; + item.typreceive = "invalid_recv"; + EXPECT_EQ(resolver.Insert(item, &error), ENOTSUP); + EXPECT_STREQ( + ArrowErrorMessage(&error), + "Base type not found for type 'invalid' with receive function 'invalid_recv'"); + + // Check error for Array with unknown child + item.typname = "some_array"; + item.typreceive = "array_recv"; + item.child_oid = 1234; + EXPECT_EQ(resolver.Insert(item, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), "Postgres type with oid 1234 not found"); + + // Check error for Range with unknown child + item.typname = "some_range"; + item.typreceive = "range_recv"; + item.base_oid = 12345; + EXPECT_EQ(resolver.Insert(item, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), "Postgres type with oid 12345 not found"); + + // Check error for Domain with unknown child + item.typname = "some_domain"; + item.typreceive = "domain_recv"; + item.base_oid = 123456; + EXPECT_EQ(resolver.Insert(item, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), "Postgres type with oid 123456 not found"); + + // Check error for Record with unknown class + item.typname = "some_record"; + item.typreceive = "record_recv"; + item.class_oid = 123456; + EXPECT_EQ(resolver.Insert(item, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), "Class definition with oid 123456 not found"); + + // Check insert/resolve of regular type + item.typname = "some_type_name"; + item.typreceive = "boolrecv"; + EXPECT_EQ(resolver.Insert(item, &error), NANOARROW_OK); + EXPECT_EQ(resolver.Find(123, &type, &error), NANOARROW_OK); + EXPECT_EQ(type.oid(), 123); + EXPECT_EQ(type.typname(), "some_type_name"); + EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); +} From fa0ef95b8085c53c33cc9239981f4be4b4cc726f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 16:22:57 -0300 Subject: [PATCH 15/90] test domain and range and array --- c/driver/postgresql/nanoarrow_pg_test.cc | 45 ++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index c3b4c3e28e..d2d1044050 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -193,9 +193,50 @@ TEST(PostgresNanoarrowTest, PostgresTypeResolver) { // Check insert/resolve of regular type item.typname = "some_type_name"; item.typreceive = "boolrecv"; + item.oid = 10; EXPECT_EQ(resolver.Insert(item, &error), NANOARROW_OK); - EXPECT_EQ(resolver.Find(123, &type, &error), NANOARROW_OK); - EXPECT_EQ(type.oid(), 123); + EXPECT_EQ(resolver.Find(10, &type, &error), NANOARROW_OK); + EXPECT_EQ(type.oid(), 10); EXPECT_EQ(type.typname(), "some_type_name"); EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); + + // Check insert/resolve of array type + item.oid = 11; + item.typname = "some_array_type_name"; + item.typreceive = "array_recv"; + item.child_oid = 10; + EXPECT_EQ(resolver.Insert(item, &error), NANOARROW_OK); + EXPECT_EQ(resolver.Find(11, &type, &error), NANOARROW_OK); + EXPECT_EQ(type.oid(), 11); + EXPECT_EQ(type.typname(), "some_array_type_name"); + EXPECT_EQ(type.recv(), PostgresType::PG_RECV_ARRAY); + EXPECT_EQ(type.child(0)->oid(), 10); + EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_BOOL); + + // Check insert/resolve of range type + item.oid = 12; + item.typname = "some_range_type_name"; + item.typreceive = "range_recv"; + item.base_oid = 10; + EXPECT_EQ(resolver.Insert(item, &error), NANOARROW_OK); + EXPECT_EQ(resolver.Find(12, &type, &error), NANOARROW_OK); + EXPECT_EQ(type.oid(), 12); + EXPECT_EQ(type.typname(), "some_range_type_name"); + EXPECT_EQ(type.recv(), PostgresType::PG_RECV_RANGE); + EXPECT_EQ(type.child(0)->oid(), 10); + EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_BOOL); + + // Check insert/resolve of domain type + item.oid = 13; + item.typname = "some_domain_type_name"; + item.typreceive = "domain_recv"; + item.base_oid = 10; + EXPECT_EQ(resolver.Insert(item, &error), NANOARROW_OK); + EXPECT_EQ(resolver.Find(13, &type, &error), NANOARROW_OK); + EXPECT_EQ(type.oid(), 13); + EXPECT_EQ(type.typname(), "some_domain_type_name"); + EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); +} + +TEST(PostgresNanoarrowTest, PostgresTypeResolveRecord) { } From 8dc332781f2adf4bfdd2df4e36135377059b38e4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 17:09:10 -0300 Subject: [PATCH 16/90] more tests --- c/driver/postgresql/nanoarrow_pg.h | 27 ++++--- c/driver/postgresql/nanoarrow_pg_test.cc | 90 ++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 9 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 51ae3502b7..3ed5c1c408 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -116,14 +116,23 @@ class PostgresType { PG_RECV_XML }; - static std::vector PgRecvAllBase() { - return {PG_RECV_BIT, PG_RECV_BOOL, PG_RECV_BYTEA, PG_RECV_CASH, - PG_RECV_CHAR, PG_RECV_DATE, PG_RECV_FLOAT4, PG_RECV_FLOAT8, - PG_RECV_INT4, PG_RECV_INT8, PG_RECV_INTERVAL, PG_RECV_NUMERIC, - PG_RECV_OID, PG_RECV_TEXT, PG_RECV_TIME, PG_RECV_TIMESTAMP, - PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, PG_RECV_UUID, PG_RECV_VARBIT, - PG_RECV_VARCHAR, PG_RECV_ARRAY, PG_RECV_RECORD, PG_RECV_RANGE, - PG_RECV_DOMAIN}; + static std::vector PgRecvAllBase(bool nested = true) { + std::vector base = {PG_RECV_BIT, PG_RECV_BOOL, PG_RECV_BYTEA, + PG_RECV_CASH, PG_RECV_CHAR, PG_RECV_DATE, + PG_RECV_FLOAT4, PG_RECV_FLOAT8, PG_RECV_INT4, + PG_RECV_INT8, PG_RECV_INTERVAL, PG_RECV_NUMERIC, + PG_RECV_OID, PG_RECV_TEXT, PG_RECV_TIME, + PG_RECV_TIMESTAMP, PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, + PG_RECV_UUID, PG_RECV_VARBIT, PG_RECV_VARCHAR}; + + if (nested) { + base.push_back(PG_RECV_ARRAY); + base.push_back(PG_RECV_RECORD); + base.push_back(PG_RECV_RANGE); + base.push_back(PG_RECV_DOMAIN); + } + + return base; } static std::string PgRecvName(PgRecv recv) { @@ -532,7 +541,7 @@ class PostgresTypeResolver { out.AddRecordChild(child_item.second, child); } - mapping_.insert({item.oid, out}); + mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); break; } diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index d2d1044050..c25ea813e1 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -22,6 +22,84 @@ using namespace adbcpq; +class MockTypeResolver : public PostgresTypeResolver { + public: + ArrowErrorCode Init() { + auto recv_base = PostgresType::PgRecvAllBase(false); + PostgresTypeResolver::Item item; + item.oid = 0; + + // Insert all the base types + for (auto recv : recv_base) { + std::string typreceive = PostgresType::PgRecvName(recv); + std::string typname = PostgresType::PgRecvTypname(recv); + item.oid++; + item.typname = typname.c_str(); + item.typreceive = typreceive.c_str(); + NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); + oids_[recv] = item.oid; + } + + // Insert one of each nested type + item.oid++; + item.typname = "_bool"; + item.typreceive = "array_recv"; + item.child_oid = oid(PostgresType::PG_RECV_BOOL); + NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); + oids_[PostgresType::PG_RECV_ARRAY] = item.oid; + + item.oid++; + item.typname = "boolrange"; + item.typreceive = "range_recv"; + item.base_oid = oid(PostgresType::PG_RECV_BOOL); + NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); + oids_[PostgresType::PG_RECV_RANGE] = item.oid; + + item.oid++; + item.typname = "custombool"; + item.typreceive = "domain_recv"; + item.base_oid = oid(PostgresType::PG_RECV_BOOL); + NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); + oids_[PostgresType::PG_RECV_DOMAIN] = item.oid; + + item.oid++; + uint32_t class_oid = item.oid; + std::vector> record_fields_ = { + {oid(PostgresType::PG_RECV_INT4), "int4_col"}, + {oid(PostgresType::PG_RECV_TEXT), "text_col"} + }; + classes_.insert({class_oid, record_fields_}); + + item.oid++; + item.typname = "customrecord"; + item.typreceive = "record_recv"; + item.class_oid = class_oid; + + NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); + oids_[PostgresType::PG_RECV_RECORD] = item.oid; + + return NANOARROW_OK; + } + + uint32_t oid(PostgresType::PgRecv recv) { return oids_[recv]; } + + ArrowErrorCode ResolveClass(uint32_t oid, + std::vector>* out, + ArrowError* error) override { + auto result = classes_.find(oid); + if (result == classes_.end()) { + return PostgresTypeResolver::ResolveClass(oid, out, error); + } + + *out = (*result).second; + return NANOARROW_OK; + } + + private: + std::unordered_map oids_; + std::unordered_map>> classes_; +}; + TEST(PostgresNanoarrowTest, PostgresTypeBasic) { PostgresType type(PostgresType::PG_RECV_BOOL); EXPECT_EQ(type.field_name(), ""); @@ -239,4 +317,16 @@ TEST(PostgresNanoarrowTest, PostgresTypeResolver) { } TEST(PostgresNanoarrowTest, PostgresTypeResolveRecord) { + // Use the mock resolver for the record test since it already has one + MockTypeResolver resolver; + ASSERT_EQ(resolver.Init(), NANOARROW_OK); + + PostgresType type; + EXPECT_EQ(resolver.Find(resolver.oid(PostgresType::PG_RECV_RECORD), &type, nullptr), NANOARROW_OK); + EXPECT_EQ(type.oid(), resolver.oid(PostgresType::PG_RECV_RECORD)); + EXPECT_EQ(type.n_children(), 2); + EXPECT_EQ(type.child(0)->field_name(), "int4_col"); + EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_INT4); + EXPECT_EQ(type.child(1)->field_name(), "text_col"); + EXPECT_EQ(type.child(1)->recv(), PostgresType::PG_RECV_TEXT); } From 9ae2b245a84001d0466b12626e8b4329e2676dc9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 17:14:06 -0300 Subject: [PATCH 17/90] rtti --- c/driver/postgresql/nanoarrow_pg.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 3ed5c1c408..ea1a1d4387 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -26,7 +26,6 @@ #include -#include "type.h" #include "util.h" namespace adbcpq { @@ -582,11 +581,20 @@ class PostgresTypeResolver { class ArrowConverter { public: - ArrowConverter(ArrowType type, PgType pg_type) - : type_(type), pg_type_(pg_type), offsets_(nullptr), data_(nullptr) { + enum Kind { + ARROW_CONVERTER_BOOL, + ARROW_CONVERTER_NUMERIC, + ARROW_CONVERTER_BINARY, + ARROW_CONVERTER_OTHER + }; + + ArrowConverter(Kind kind, ArrowType type) + : kind_(kind), type_(type), offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } + Kind kind() { return kind_; } + virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, type_)); NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); @@ -623,7 +631,7 @@ class ArrowConverter { } protected: - PgType pg_type_; + Kind kind_; ArrowType type_; ArrowSchemaView schema_view_; ArrowBuffer* offsets_; @@ -635,8 +643,8 @@ class ArrowConverter { // the bswap from network endian). This includes all integral and float types. class NumericArrowConverter : public ArrowConverter { public: - NumericArrowConverter(ArrowType type, PgType pg_type) - : ArrowConverter(type, pg_type), data_(nullptr) {} + NumericArrowConverter(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_NUMERIC, type), data_(nullptr) {} ArrowErrorCode InitSchema(ArrowSchema* schema) override { NANOARROW_RETURN_NOT_OK(ArrowConverter::InitSchema(schema)); @@ -664,8 +672,8 @@ class NumericArrowConverter : public ArrowConverter { // Arrow types and any postgres type. class BinaryArrowConverter : public ArrowConverter { public: - BinaryArrowConverter(ArrowType type, PgType pg_type) - : ArrowConverter(type, pg_type), data_(nullptr) {} + BinaryArrowConverter(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_BINARY, type), data_(nullptr) {} ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { From 602ac444cd1345d3a74dde7925f3665c76a76ece Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:00:38 -0300 Subject: [PATCH 18/90] fix rat --- ...MakeUserPresets.json.example => CMakeUserPresets.example.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename c/driver/postgresql/{CMakeUserPresets.json.example => CMakeUserPresets.example.json} (100%) diff --git a/c/driver/postgresql/CMakeUserPresets.json.example b/c/driver/postgresql/CMakeUserPresets.example.json similarity index 100% rename from c/driver/postgresql/CMakeUserPresets.json.example rename to c/driver/postgresql/CMakeUserPresets.example.json From 13e96d1ec67afe9e050b4e095a112603033801fb Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:01:02 -0300 Subject: [PATCH 19/90] format --- c/driver/postgresql/nanoarrow_pg_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index c25ea813e1..6c26fcbad7 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -65,9 +65,8 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; uint32_t class_oid = item.oid; std::vector> record_fields_ = { - {oid(PostgresType::PG_RECV_INT4), "int4_col"}, - {oid(PostgresType::PG_RECV_TEXT), "text_col"} - }; + {oid(PostgresType::PG_RECV_INT4), "int4_col"}, + {oid(PostgresType::PG_RECV_TEXT), "text_col"}}; classes_.insert({class_oid, record_fields_}); item.oid++; @@ -322,7 +321,8 @@ TEST(PostgresNanoarrowTest, PostgresTypeResolveRecord) { ASSERT_EQ(resolver.Init(), NANOARROW_OK); PostgresType type; - EXPECT_EQ(resolver.Find(resolver.oid(PostgresType::PG_RECV_RECORD), &type, nullptr), NANOARROW_OK); + EXPECT_EQ(resolver.Find(resolver.oid(PostgresType::PG_RECV_RECORD), &type, nullptr), + NANOARROW_OK); EXPECT_EQ(type.oid(), resolver.oid(PostgresType::PG_RECV_RECORD)); EXPECT_EQ(type.n_children(), 2); EXPECT_EQ(type.child(0)->field_name(), "int4_col"); From 2f5c2b802fc57f0441c8f79fa52a6a33cd73a619 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:05:45 -0300 Subject: [PATCH 20/90] maybe fix windows build --- c/driver/postgresql/nanoarrow_pg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index ea1a1d4387..eb96656fce 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -23,6 +23,7 @@ #include #include #include +#include #include From 26aa55777588ca607ac1b3dcf4d1f5aaf44a62b3 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:06:57 -0300 Subject: [PATCH 21/90] maybe fix ubuntu build --- c/driver/postgresql/nanoarrow_pg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index eb96656fce..619a339da8 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -404,7 +404,7 @@ class PostgresType { PgRecv recv() const { return recv_; } const std::string& typname() { return typname_; } const std::string& field_name() const { return field_name_; } - const int64_t n_children() const { return static_cast(children_.size()); } + int64_t n_children() const { return static_cast(children_.size()); } const PostgresType* child(int64_t i) const { return &children_[i]; } ArrowErrorCode SetSchema(ArrowSchema* schema) const { From 4104fc42060159258dfa5a5d2144aae567897987 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:22:59 -0300 Subject: [PATCH 22/90] punt on overflow --- c/driver/postgresql/nanoarrow_pg.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 619a339da8..996596960f 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -23,7 +23,6 @@ #include #include #include -#include #include @@ -678,10 +677,6 @@ class BinaryArrowConverter : public ArrowConverter { ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { - if ((data_->size_bytes + data.size_bytes) > std::numeric_limits::max()) { - return EOVERFLOW; - } - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, (int32_t)data_->size_bytes)); return NANOARROW_OK; From 3f05227368a7596c47f5babcaee9643856c2f0d0 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:40:06 -0300 Subject: [PATCH 23/90] maybe fix lint --- c/driver/postgresql/nanoarrow_pg.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 996596960f..3362408b67 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -502,7 +503,7 @@ class PostgresTypeResolver { auto result = mapping_.find(oid); if (result == mapping_.end()) { ArrowErrorSet(error, "Postgres type with oid %ld not found", - static_cast(oid)); + static_cast(oid)); // NOLINT(runtime/int) return EINVAL; } @@ -570,7 +571,7 @@ class PostgresTypeResolver { std::vector>* out, ArrowError* error) { ArrowErrorSet(error, "Class definition with oid %ld not found", - static_cast(oid)); + static_cast(oid)); // NOLINT(runtime/int) return EINVAL; } @@ -643,7 +644,7 @@ class ArrowConverter { // the bswap from network endian). This includes all integral and float types. class NumericArrowConverter : public ArrowConverter { public: - NumericArrowConverter(ArrowType type) + explicit NumericArrowConverter(ArrowType type) : ArrowConverter(ARROW_CONVERTER_NUMERIC, type), data_(nullptr) {} ArrowErrorCode InitSchema(ArrowSchema* schema) override { @@ -672,7 +673,7 @@ class NumericArrowConverter : public ArrowConverter { // Arrow types and any postgres type. class BinaryArrowConverter : public ArrowConverter { public: - BinaryArrowConverter(ArrowType type) + explicit BinaryArrowConverter(ArrowType type) : ArrowConverter(ARROW_CONVERTER_BINARY, type), data_(nullptr) {} ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, From ee05ff2acfc147bdbc3a6d8594dc676681eb2da2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:41:17 -0300 Subject: [PATCH 24/90] more lint --- c/driver/postgresql/nanoarrow_pg_test.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/nanoarrow_pg_test.cc index 6c26fcbad7..0f26afd85d 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/nanoarrow_pg_test.cc @@ -15,12 +15,15 @@ // specific language governing permissions and limitations // under the License. +#include + #include #include #include "nanoarrow_pg.h" -using namespace adbcpq; +using adbcpq::PostgresType; +using adbcpq::PostgresTypeResolver; class MockTypeResolver : public PostgresTypeResolver { public: From 8d6ff12fb3aa3bbae9d22305bec6d6e31bcadcc7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 3 Apr 2023 22:50:29 -0300 Subject: [PATCH 25/90] lint --- c/driver/postgresql/nanoarrow_pg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 3362408b67..df653276af 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -358,7 +358,7 @@ class PostgresType { } } - PostgresType(PgRecv recv) : oid_(0), recv_(recv) {} + explicit PostgresType(PgRecv recv) : oid_(0), recv_(recv) {} PostgresType() : PostgresType(PG_RECV_UNINITIALIZED) {} From 2a8e34e8b3a849ff73f7e78121fc68805d9c5c75 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 00:02:02 -0300 Subject: [PATCH 26/90] maybe some reading --- c/driver/postgresql/nanoarrow_pg.h | 135 +++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 17 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index df653276af..8cf38b372b 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -584,8 +584,10 @@ class ArrowConverter { public: enum Kind { ARROW_CONVERTER_BOOL, - ARROW_CONVERTER_NUMERIC, + ARROW_CONVERTER_NETWORK_ENDIAN, ARROW_CONVERTER_BINARY, + ARROW_CONVERTER_LIST, + ARROW_CONVERTER_STRUCT, ARROW_CONVERTER_OTHER }; @@ -594,10 +596,16 @@ class ArrowConverter { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - Kind kind() { return kind_; } + virtual ~ArrowConverter() {} + + Kind kind() const { return kind_; } + + void AppendChild(ArrowConverter& child) { + children_kind_.push_back(child.kind()); + children_.push_back(std::move(child)); + } virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, type_)); NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); return NANOARROW_OK; } @@ -606,6 +614,7 @@ class ArrowConverter { NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); + // Cache some buffer pointers for (int32_t i = 0; i < 3; i++) { switch (schema_view_.layout.buffer_type[i]) { case NANOARROW_BUFFER_TYPE_DATA_OFFSET: @@ -625,7 +634,7 @@ class ArrowConverter { } virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) = 0; + ArrowError* error) {return ENOTSUP; } virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { return NANOARROW_OK; @@ -638,14 +647,45 @@ class ArrowConverter { ArrowBuffer* offsets_; ArrowBuffer* large_offsets_; ArrowBuffer* data_; + std::vector children_kind_; + std::vector children_; +}; + +// Converter for a Postgres boolean (one byte -> bitmap) +class ArrowConverterBool : public ArrowConverter { + public: + explicit ArrowConverterBool(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_BOOL, type) {} + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + int64_t bytes_required = _ArrowBytesForBits(array->length + 1); + if (bytes_required > data_->size_bytes) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); + } + + if (data.data.as_uint8[0]) { + ArrowBitSet(data_->data, array->length); + } else { + ArrowBitClear(data_->data, array->length); + } + + array->length++; + return NANOARROW_OK; + } }; // Converter for Pg->Arrow conversions whose representations are identical (minus // the bswap from network endian). This includes all integral and float types. -class NumericArrowConverter : public ArrowConverter { +class ArrowConverterNetworkEndian : public ArrowConverter { public: - explicit NumericArrowConverter(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_NUMERIC, type), data_(nullptr) {} + explicit ArrowConverterNetworkEndian(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_NETWORK_ENDIAN, type) {} ArrowErrorCode InitSchema(ArrowSchema* schema) override { NANOARROW_RETURN_NOT_OK(ArrowConverter::InitSchema(schema)); @@ -655,7 +695,13 @@ class NumericArrowConverter : public ArrowConverter { ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { - return ArrowBufferAppendBufferView(data_, data); + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); + array->length++; + return NANOARROW_OK; } ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) override { @@ -664,29 +710,84 @@ class NumericArrowConverter : public ArrowConverter { } private: - ArrowBuffer* data_; int32_t bitwidth_; }; // Converter for Pg->Arrow conversions whose Arrow representation is simply the // bytes of the field representation. This can be used with binary and string // Arrow types and any postgres type. -class BinaryArrowConverter : public ArrowConverter { +class ArrowConverterBinary : public ArrowConverter { public: - explicit BinaryArrowConverter(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_BINARY, type), data_(nullptr) {} + explicit ArrowConverterBinary(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_BINARY, type) {} ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, (int32_t)data_->size_bytes)); + int32_t* offsets = reinterpret_cast(offsets_->data); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + offsets_, offsets[array->length] + static_cast(data_->size_bytes))); + + array->length++; return NANOARROW_OK; } +}; + +class ArrowConverterList : public ArrowConverter { + public: + explicit ArrowConverterList(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_BINARY, type) {} + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + return ENOTSUP; + } +}; - private: - ArrowBuffer* offsets_; - ArrowBuffer* data_; - int32_t bitwidth_; +class ArrowConverterStruct : public ArrowConverter { + public: + explicit ArrowConverterStruct(ArrowType type) + : ArrowConverter(ARROW_CONVERTER_BINARY, type) {} + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + uint16_t n_fields = LoadNetworkInt16(data.data.as_char); + data.data.as_char += sizeof(uint16_t); + data.size_bytes -= sizeof(uint16_t); + + struct ArrowBufferView field_data; + for (uint16_t i = 0; i < n_fields; i++) { + field_data.size_bytes = LoadNetworkInt32(data.data.as_char) - sizeof(int32_t); + data.data.as_char += sizeof(int32_t); + data.size_bytes -= sizeof(int32_t); + field_data.data.as_char = data.data.as_char; + + int result = children_[i].Read(field_data, array->children[i], error); + if (result == EOVERFLOW) { + for (int16_t j = 0; j < i; i++) { + array->children[j]->length--; + } + + return result; + } + + data.data.as_char += field_data.size_bytes; + } + + array->length++; + return NANOARROW_OK; + } }; } // namespace adbcpq From dcd738ce28f2c2cf6cc45a953b48322fb9e755e7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 09:54:00 -0300 Subject: [PATCH 27/90] simplify a bit --- c/driver/postgresql/nanoarrow_pg.h | 73 +++++++++--------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/nanoarrow_pg.h index 8cf38b372b..61528b9563 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/nanoarrow_pg.h @@ -582,28 +582,11 @@ class PostgresTypeResolver { class ArrowConverter { public: - enum Kind { - ARROW_CONVERTER_BOOL, - ARROW_CONVERTER_NETWORK_ENDIAN, - ARROW_CONVERTER_BINARY, - ARROW_CONVERTER_LIST, - ARROW_CONVERTER_STRUCT, - ARROW_CONVERTER_OTHER - }; - - ArrowConverter(Kind kind, ArrowType type) - : kind_(kind), type_(type), offsets_(nullptr), data_(nullptr) { + ArrowConverter() : offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - virtual ~ArrowConverter() {} - - Kind kind() const { return kind_; } - - void AppendChild(ArrowConverter& child) { - children_kind_.push_back(child.kind()); - children_.push_back(std::move(child)); - } + void AppendChild(ArrowConverter& child) { children_.push_back(std::move(child)); } virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); @@ -634,29 +617,25 @@ class ArrowConverter { } virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) {return ENOTSUP; } + ArrowError* error) { + return ENOTSUP; + } virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { return NANOARROW_OK; } protected: - Kind kind_; ArrowType type_; ArrowSchemaView schema_view_; ArrowBuffer* offsets_; - ArrowBuffer* large_offsets_; ArrowBuffer* data_; - std::vector children_kind_; std::vector children_; }; // Converter for a Postgres boolean (one byte -> bitmap) class ArrowConverterBool : public ArrowConverter { public: - explicit ArrowConverterBool(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_BOOL, type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -682,35 +661,23 @@ class ArrowConverterBool : public ArrowConverter { // Converter for Pg->Arrow conversions whose representations are identical (minus // the bswap from network endian). This includes all integral and float types. +template class ArrowConverterNetworkEndian : public ArrowConverter { public: - explicit ArrowConverterNetworkEndian(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_NETWORK_ENDIAN, type) {} - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(ArrowConverter::InitSchema(schema)); - bitwidth_ = schema_view_.layout.element_size_bits[1]; - return NANOARROW_OK; - } - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); - array->length++; - return NANOARROW_OK; - } + uint_type value_uint; + memcpy(&value_uint, data.data.data, sizeof(uint_type)); + value_uint = SwapNetworkToHost(value_uint); - ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) override { - BufferToHostEndian(data_->data, data_->size_bytes, bitwidth_); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&value_uint, sizeof(value_uint))); + array->length++; return NANOARROW_OK; } - - private: - int32_t bitwidth_; }; // Converter for Pg->Arrow conversions whose Arrow representation is simply the @@ -718,9 +685,6 @@ class ArrowConverterNetworkEndian : public ArrowConverter { // Arrow types and any postgres type. class ArrowConverterBinary : public ArrowConverter { public: - explicit ArrowConverterBinary(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_BINARY, type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -739,23 +703,28 @@ class ArrowConverterBinary : public ArrowConverter { class ArrowConverterList : public ArrowConverter { public: - explicit ArrowConverterList(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_BINARY, type) {} + ArrowConverterList(ArrowConverter& child) { + AppendChild(child); + } ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } + + // int32_t ndim + // int32_t flags + // uint32_t element_type_oid + // (struct int32_t dim_size, int32_t dim_lower_bound)[ndim] + // (struct int32_t item_size_bytes, uint8_t[item_size_bytes])[nitems] + return ENOTSUP; } }; class ArrowConverterStruct : public ArrowConverter { public: - explicit ArrowConverterStruct(ArrowType type) - : ArrowConverter(ARROW_CONVERTER_BINARY, type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { From 5c6acfe576f6d644f3cce28b0864d0d43e2530c8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 11:03:19 -0300 Subject: [PATCH 28/90] split + rename --- c/driver/postgresql/CMakeLists.txt | 2 +- c/driver/postgresql/postgres_copy_utils.h | 213 ++++++++++++++++++ .../{nanoarrow_pg.h => postgres_type.h} | 182 --------------- ...arrow_pg_test.cc => postgres_type_test.cc} | 2 +- 4 files changed, 215 insertions(+), 184 deletions(-) create mode 100644 c/driver/postgresql/postgres_copy_utils.h rename c/driver/postgresql/{nanoarrow_pg.h => postgres_type.h} (75%) rename c/driver/postgresql/{nanoarrow_pg_test.cc => postgres_type_test.cc} (99%) diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 061c9e22e2..628bd2b546 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -77,7 +77,7 @@ if(ADBC_BUILD_TESTS) PREFIX adbc SOURCES - nanoarrow_pg_test.cc + postgres_type_test.cc postgresql_test.cc ../../validation/adbc_validation.cc ../../validation/adbc_validation_util.cc diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h new file mode 100644 index 0000000000..f3c59dec58 --- /dev/null +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "util.h" + +namespace adbcpq { + +template +T ReadUnsafe(ArrowBufferView* data) {} + +class ArrowConverter { + public: + ArrowConverter() : offsets_(nullptr), data_(nullptr) { + memset(&schema_view_, 0, sizeof(ArrowSchemaView)); + } + + void AppendChild(ArrowConverter& child) { children_.push_back(std::move(child)); } + + virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); + return NANOARROW_OK; + } + + virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); + + // Cache some buffer pointers + for (int32_t i = 0; i < 3; i++) { + switch (schema_view_.layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + if (schema_view_.layout.element_size_bits[i] == 32) { + offsets_ = ArrowArrayBuffer(array, i); + } + break; + case NANOARROW_BUFFER_TYPE_DATA: + data_ = ArrowArrayBuffer(array, i); + break; + default: + break; + } + } + + return NANOARROW_OK; + } + + virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) { + return ENOTSUP; + } + + virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { + return NANOARROW_OK; + } + + protected: + ArrowType type_; + ArrowSchemaView schema_view_; + ArrowBuffer* offsets_; + ArrowBuffer* data_; + std::vector children_; +}; + +// Converter for a Postgres boolean (one byte -> bitmap) +class ArrowConverterBool : public ArrowConverter { + public: + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + int64_t bytes_required = _ArrowBytesForBits(array->length + 1); + if (bytes_required > data_->size_bytes) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); + } + + if (data.data.as_uint8[0]) { + ArrowBitSet(data_->data, array->length); + } else { + ArrowBitClear(data_->data, array->length); + } + + array->length++; + return NANOARROW_OK; + } +}; + +// Converter for Pg->Arrow conversions whose representations are identical (minus +// the bswap from network endian). This includes all integral and float types. +template +class ArrowConverterNetworkEndian : public ArrowConverter { + public: + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + uint_type value_uint; + memcpy(&value_uint, data.data.data, sizeof(uint_type)); + value_uint = SwapNetworkToHost(value_uint); + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&value_uint, sizeof(value_uint))); + array->length++; + return NANOARROW_OK; + } +}; + +// Converter for Pg->Arrow conversions whose Arrow representation is simply the +// bytes of the field representation. This can be used with binary and string +// Arrow types and any postgres type. +class ArrowConverterBinary : public ArrowConverter { + public: + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); + int32_t* offsets = reinterpret_cast(offsets_->data); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + offsets_, offsets[array->length] + static_cast(data_->size_bytes))); + + array->length++; + return NANOARROW_OK; + } +}; + +class ArrowConverterList : public ArrowConverter { + public: + ArrowConverterList(ArrowConverter& child) { AppendChild(child); } + + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + // int32_t ndim + // int32_t flags + // uint32_t element_type_oid + // (struct int32_t dim_size, int32_t dim_lower_bound)[ndim] + // (struct int32_t item_size_bytes, uint8_t[item_size_bytes])[nitems] + + return ENOTSUP; + } +}; + +class ArrowConverterStruct : public ArrowConverter { + public: + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowError* error) override { + if (data.size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + uint16_t n_fields = LoadNetworkInt16(data.data.as_char); + data.data.as_char += sizeof(uint16_t); + data.size_bytes -= sizeof(uint16_t); + + struct ArrowBufferView field_data; + for (uint16_t i = 0; i < n_fields; i++) { + field_data.size_bytes = LoadNetworkInt32(data.data.as_char) - sizeof(int32_t); + data.data.as_char += sizeof(int32_t); + data.size_bytes -= sizeof(int32_t); + field_data.data.as_char = data.data.as_char; + + int result = children_[i].Read(field_data, array->children[i], error); + if (result == EOVERFLOW) { + for (int16_t j = 0; j < i; i++) { + array->children[j]->length--; + } + + return result; + } + + data.data.as_char += field_data.size_bytes; + } + + array->length++; + return NANOARROW_OK; + } +}; + +} // namespace adbcpq \ No newline at end of file diff --git a/c/driver/postgresql/nanoarrow_pg.h b/c/driver/postgresql/postgres_type.h similarity index 75% rename from c/driver/postgresql/nanoarrow_pg.h rename to c/driver/postgresql/postgres_type.h index 61528b9563..f36c8c2e97 100644 --- a/c/driver/postgresql/nanoarrow_pg.h +++ b/c/driver/postgresql/postgres_type.h @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -27,8 +26,6 @@ #include -#include "util.h" - namespace adbcpq { class PostgresType { @@ -580,183 +577,4 @@ class PostgresTypeResolver { std::unordered_map base_; }; -class ArrowConverter { - public: - ArrowConverter() : offsets_(nullptr), data_(nullptr) { - memset(&schema_view_, 0, sizeof(ArrowSchemaView)); - } - - void AppendChild(ArrowConverter& child) { children_.push_back(std::move(child)); } - - virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); - return NANOARROW_OK; - } - - virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); - - // Cache some buffer pointers - for (int32_t i = 0; i < 3; i++) { - switch (schema_view_.layout.buffer_type[i]) { - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - if (schema_view_.layout.element_size_bits[i] == 32) { - offsets_ = ArrowArrayBuffer(array, i); - } - break; - case NANOARROW_BUFFER_TYPE_DATA: - data_ = ArrowArrayBuffer(array, i); - break; - default: - break; - } - } - - return NANOARROW_OK; - } - - virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) { - return ENOTSUP; - } - - virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { - return NANOARROW_OK; - } - - protected: - ArrowType type_; - ArrowSchemaView schema_view_; - ArrowBuffer* offsets_; - ArrowBuffer* data_; - std::vector children_; -}; - -// Converter for a Postgres boolean (one byte -> bitmap) -class ArrowConverterBool : public ArrowConverter { - public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - if (data.size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - int64_t bytes_required = _ArrowBytesForBits(array->length + 1); - if (bytes_required > data_->size_bytes) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); - } - - if (data.data.as_uint8[0]) { - ArrowBitSet(data_->data, array->length); - } else { - ArrowBitClear(data_->data, array->length); - } - - array->length++; - return NANOARROW_OK; - } -}; - -// Converter for Pg->Arrow conversions whose representations are identical (minus -// the bswap from network endian). This includes all integral and float types. -template -class ArrowConverterNetworkEndian : public ArrowConverter { - public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - if (data.size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - uint_type value_uint; - memcpy(&value_uint, data.data.data, sizeof(uint_type)); - value_uint = SwapNetworkToHost(value_uint); - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&value_uint, sizeof(value_uint))); - array->length++; - return NANOARROW_OK; - } -}; - -// Converter for Pg->Arrow conversions whose Arrow representation is simply the -// bytes of the field representation. This can be used with binary and string -// Arrow types and any postgres type. -class ArrowConverterBinary : public ArrowConverter { - public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - if (data.size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); - int32_t* offsets = reinterpret_cast(offsets_->data); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - offsets_, offsets[array->length] + static_cast(data_->size_bytes))); - - array->length++; - return NANOARROW_OK; - } -}; - -class ArrowConverterList : public ArrowConverter { - public: - ArrowConverterList(ArrowConverter& child) { - AppendChild(child); - } - - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - if (data.size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - // int32_t ndim - // int32_t flags - // uint32_t element_type_oid - // (struct int32_t dim_size, int32_t dim_lower_bound)[ndim] - // (struct int32_t item_size_bytes, uint8_t[item_size_bytes])[nitems] - - return ENOTSUP; - } -}; - -class ArrowConverterStruct : public ArrowConverter { - public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) override { - if (data.size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - uint16_t n_fields = LoadNetworkInt16(data.data.as_char); - data.data.as_char += sizeof(uint16_t); - data.size_bytes -= sizeof(uint16_t); - - struct ArrowBufferView field_data; - for (uint16_t i = 0; i < n_fields; i++) { - field_data.size_bytes = LoadNetworkInt32(data.data.as_char) - sizeof(int32_t); - data.data.as_char += sizeof(int32_t); - data.size_bytes -= sizeof(int32_t); - field_data.data.as_char = data.data.as_char; - - int result = children_[i].Read(field_data, array->children[i], error); - if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; i++) { - array->children[j]->length--; - } - - return result; - } - - data.data.as_char += field_data.size_bytes; - } - - array->length++; - return NANOARROW_OK; - } -}; - } // namespace adbcpq diff --git a/c/driver/postgresql/nanoarrow_pg_test.cc b/c/driver/postgresql/postgres_type_test.cc similarity index 99% rename from c/driver/postgresql/nanoarrow_pg_test.cc rename to c/driver/postgresql/postgres_type_test.cc index 0f26afd85d..f5b7b2e4fc 100644 --- a/c/driver/postgresql/nanoarrow_pg_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -20,7 +20,7 @@ #include #include -#include "nanoarrow_pg.h" +#include "postgres_type.h" using adbcpq::PostgresType; using adbcpq::PostgresTypeResolver; From 6367ca29c9b4d99daf57fd276c29149277656e42 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 11:06:57 -0300 Subject: [PATCH 29/90] wire up another test --- c/driver/postgresql/CMakeLists.txt | 1 + c/driver/postgresql/postgres_copy_utils.h | 2 -- .../postgresql/postgres_copy_utils_test.cc | 27 +++++++++++++++++++ c/driver/postgresql/postgres_type_test.cc | 10 +++---- 4 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 c/driver/postgresql/postgres_copy_utils_test.cc diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 628bd2b546..c3deb6ab00 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -77,6 +77,7 @@ if(ADBC_BUILD_TESTS) PREFIX adbc SOURCES + postgres_copy_utils_test.cc postgres_type_test.cc postgresql_test.cc ../../validation/adbc_validation.cc diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index f3c59dec58..6fd081618c 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -20,8 +20,6 @@ #include #include #include -#include -#include #include #include diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc new file mode 100644 index 0000000000..a8c76e53b9 --- /dev/null +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "postgres_copy_utils.h" + +TEST(PostgresCopyUtilsTest, PostgresTypeBasic) { + EXPECT_EQ(4, 4); +} diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index f5b7b2e4fc..704fe2c3b2 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -102,7 +102,7 @@ class MockTypeResolver : public PostgresTypeResolver { std::unordered_map>> classes_; }; -TEST(PostgresNanoarrowTest, PostgresTypeBasic) { +TEST(PostgresTypeTest, PostgresTypeBasic) { PostgresType type(PostgresType::PG_RECV_BOOL); EXPECT_EQ(type.field_name(), ""); EXPECT_EQ(type.typname(), ""); @@ -147,7 +147,7 @@ TEST(PostgresNanoarrowTest, PostgresTypeBasic) { EXPECT_EQ(record.child(0)->field_name(), "col1"); } -TEST(PostgresNanoarrowTest, PostgresTypeSetSchema) { +TEST(PostgresTypeTest, PostgresTypeSetSchema) { ArrowSchema schema; ArrowSchemaInit(&schema); @@ -216,14 +216,14 @@ TEST(PostgresNanoarrowTest, PostgresTypeSetSchema) { schema.release(&schema); } -TEST(PostgresNanoarrowTest, PostgresTypeAllBase) { +TEST(PostgresTypeTest, PostgresTypeAllBase) { auto base_types = PostgresType::AllBase(); EXPECT_EQ(base_types["array_recv"].recv(), PostgresType::PG_RECV_ARRAY); EXPECT_EQ(base_types["array_recv"].typname(), "array"); EXPECT_EQ(base_types.size(), PostgresType::PgRecvAllBase().size()); } -TEST(PostgresNanoarrowTest, PostgresTypeResolver) { +TEST(PostgresTypeTest, PostgresTypeResolver) { PostgresTypeResolver resolver; ArrowError error; PostgresType type; @@ -318,7 +318,7 @@ TEST(PostgresNanoarrowTest, PostgresTypeResolver) { EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); } -TEST(PostgresNanoarrowTest, PostgresTypeResolveRecord) { +TEST(PostgresTypeTest, PostgresTypeResolveRecord) { // Use the mock resolver for the record test since it already has one MockTypeResolver resolver; ASSERT_EQ(resolver.Init(), NANOARROW_OK); From e1009cf8d09af38a47abd26534ff09d4b3ac983d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 12:14:27 -0300 Subject: [PATCH 30/90] more utils --- c/driver/postgresql/postgres_copy_utils.h | 171 ++++++++++++++---- .../postgresql/postgres_copy_utils_test.cc | 2 - c/driver/postgresql/util.h | 32 ---- 3 files changed, 136 insertions(+), 69 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 6fd081618c..875db37d76 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -28,23 +28,72 @@ namespace adbcpq { +static int8_t kPgCopyBinarySignature[] = {'P', 'G', 'C', 'O', 'P', 'Y', + '\n', '\377', '\r', '\n', '\0'}; + +// Read a value from the buffer without checking the buffer size. Advances +// the cursor of data and reduces its size by sizeof(T). +template +inline T ReadUnsafe(ArrowBufferView* data) { + T out; + memcpy(&out, data->data.data, sizeof(T)); + out = SwapNetworkToHost(out); + data->data.as_uint8 += sizeof(T); + data->size_bytes -= sizeof(T); + return out; +} + +// Define some explicit specializations for types that don't have a SwapNetworkToHost +// overload. +template <> +inline int8_t ReadUnsafe(ArrowBufferView* data) { + int8_t out = data->data.as_int8[0]; + data->data.as_uint8 += sizeof(int8_t); + data->size_bytes -= sizeof(int8_t); + return out; +} + +template <> +inline int16_t ReadUnsafe(ArrowBufferView* data) { + return static_cast(ReadUnsafe(data)); +} + +template <> +inline int32_t ReadUnsafe(ArrowBufferView* data) { + return static_cast(ReadUnsafe(data)); +} + +template <> +inline int64_t ReadUnsafe(ArrowBufferView* data) { + return static_cast(ReadUnsafe(data)); +} + template -T ReadUnsafe(ArrowBufferView* data) {} +ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { + if (data->size_bytes < sizeof(T)) { + ArrowErrorSet(error, "Unexpected end of input (expected %d bytes but found %ld)", + (int)sizeof(T), (long)data->size_bytes); + return EINVAL; + } -class ArrowConverter { + *out = ReadUnsafe(data); + return NANOARROW_OK; +} + +class PostgresCopyReader { public: - ArrowConverter() : offsets_(nullptr), data_(nullptr) { + PostgresCopyReader() : offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - void AppendChild(ArrowConverter& child) { children_.push_back(std::move(child)); } + void AppendChild(PostgresCopyReader& child) { children_.push_back(std::move(child)); } - virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { + ArrowErrorCode InitSchema(ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); return NANOARROW_OK; } - virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { + ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); @@ -72,7 +121,7 @@ class ArrowConverter { return ENOTSUP; } - virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { + ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { return NANOARROW_OK; } @@ -81,11 +130,11 @@ class ArrowConverter { ArrowSchemaView schema_view_; ArrowBuffer* offsets_; ArrowBuffer* data_; - std::vector children_; + std::vector children_; }; // Converter for a Postgres boolean (one byte -> bitmap) -class ArrowConverterBool : public ArrowConverter { +class PostgresCopyReaderBool : public PostgresCopyReader { public: ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { @@ -99,7 +148,10 @@ class ArrowConverterBool : public ArrowConverter { ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); } - if (data.data.as_uint8[0]) { + int8_t value; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &value, error)); + + if (value) { ArrowBitSet(data_->data, array->length); } else { ArrowBitClear(data_->data, array->length); @@ -110,10 +162,10 @@ class ArrowConverterBool : public ArrowConverter { } }; -// Converter for Pg->Arrow conversions whose representations are identical (minus -// the bswap from network endian). This includes all integral and float types. -template -class ArrowConverterNetworkEndian : public ArrowConverter { +// Converter for Pg->Arrow conversions whose representations are identical minus +// the bswap from network endian. This includes all integral and float types. +template +class PostgresCopyReaderNetworkEndian : public PostgresCopyReader { public: ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { @@ -121,20 +173,22 @@ class ArrowConverterNetworkEndian : public ArrowConverter { return ArrowArrayAppendNull(array, 1); } - uint_type value_uint; - memcpy(&value_uint, data.data.data, sizeof(uint_type)); - value_uint = SwapNetworkToHost(value_uint); - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&value_uint, sizeof(value_uint))); + T value_uint; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &value_uint, error)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&value_uint, sizeof(T))); array->length++; return NANOARROW_OK; } }; +using PostgresCopyReaderNetworkEndian16 = PostgresCopyReaderNetworkEndian; +using PostgresCopyReaderNetworkEndian32 = PostgresCopyReaderNetworkEndian; +using PostgresCopyReaderNetworkEndian64 = PostgresCopyReaderNetworkEndian; + // Converter for Pg->Arrow conversions whose Arrow representation is simply the // bytes of the field representation. This can be used with binary and string // Arrow types and any postgres type. -class ArrowConverterBinary : public ArrowConverter { +class PostgresCopyReaderBinary : public PostgresCopyReader { public: ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { @@ -152,9 +206,9 @@ class ArrowConverterBinary : public ArrowConverter { } }; -class ArrowConverterList : public ArrowConverter { +class PostgresCopyReaderList : public PostgresCopyReader { public: - ArrowConverterList(ArrowConverter& child) { AppendChild(child); } + PostgresCopyReaderList(PostgresCopyReader& child) { AppendChild(child); } ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { @@ -162,17 +216,58 @@ class ArrowConverterList : public ArrowConverter { return ArrowArrayAppendNull(array, 1); } - // int32_t ndim - // int32_t flags - // uint32_t element_type_oid - // (struct int32_t dim_size, int32_t dim_lower_bound)[ndim] + int32_t n_dim; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &n_dim, error)); + int32_t flags; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &flags, error)); + uint32_t element_type_oid; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &element_type_oid, error)); + + if (n_dim <= 0) { + ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", + static_cast(n_dim)); + return EINVAL; + } + + int64_t n_items = 1; + for (int32_t i = 0; i < n_dim; i++) { + int32_t dim_size; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &dim_size, error)); + n_items *= dim_size; + + int32_t lower_bound; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &lower_bound, error)); + } + + ArrowBufferView field_data; + for (int64_t i = 0; i < n_items; i++) { + int32_t field_length; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &field_length, error)); + field_data.data.as_uint8 = data.data.as_uint8; + field_data.size_bytes = field_length; + + // Note: Read() here is a virtual method call + int result = children_[0].Read(field_data, array->children[i], error); + if (result == EOVERFLOW) { + for (int16_t j = 0; j < i; i++) { + array->children[j]->length--; + } + + return result; + } + + if (field_length > 0) { + data.data.as_uint8 += field_length; + data.size_bytes -= field_length; + } + } // (struct int32_t item_size_bytes, uint8_t[item_size_bytes])[nitems] return ENOTSUP; } }; -class ArrowConverterStruct : public ArrowConverter { +class PostgresCopyReaderStruct : public PostgresCopyReader { public: ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { @@ -180,17 +275,20 @@ class ArrowConverterStruct : public ArrowConverter { return ArrowArrayAppendNull(array, 1); } - uint16_t n_fields = LoadNetworkInt16(data.data.as_char); - data.data.as_char += sizeof(uint16_t); - data.size_bytes -= sizeof(uint16_t); + int16_t n_fields; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &n_fields, error)); + if (n_fields < 0) { + return ENODATA; + } struct ArrowBufferView field_data; for (uint16_t i = 0; i < n_fields; i++) { - field_data.size_bytes = LoadNetworkInt32(data.data.as_char) - sizeof(int32_t); - data.data.as_char += sizeof(int32_t); - data.size_bytes -= sizeof(int32_t); - field_data.data.as_char = data.data.as_char; + int32_t field_length; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &field_length, error)); + field_data.data.as_uint8 = data.data.as_uint8; + field_data.size_bytes = field_length; + // Note: Read() here is a virtual method call int result = children_[i].Read(field_data, array->children[i], error); if (result == EOVERFLOW) { for (int16_t j = 0; j < i; i++) { @@ -200,7 +298,10 @@ class ArrowConverterStruct : public ArrowConverter { return result; } - data.data.as_char += field_data.size_bytes; + if (field_length > 0) { + data.data.as_uint8 += field_length; + data.size_bytes -= field_length; + } } array->length++; diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index a8c76e53b9..c25a12af7b 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include - #include #include diff --git a/c/driver/postgresql/util.h b/c/driver/postgresql/util.h index 118114e749..c5982f0a8f 100644 --- a/c/driver/postgresql/util.h +++ b/c/driver/postgresql/util.h @@ -76,38 +76,6 @@ static inline uint64_t SwapNetworkToHost(uint64_t x) { return be64toh(x); } static inline uint64_t SwapHostToNetwork(uint64_t x) { return htobe64(x); } #endif -static inline void BufferToHostEndian(uint8_t* data, int64_t size_bytes, - int32_t bitwidth) { - switch (bitwidth) { - case 1: - case 8: - break; - case 16: { - uint16_t* data_uint = reinterpret_cast(data); - for (int64_t i = 0; i < size_bytes / 2; i++) { - data_uint[i] = SwapNetworkToHost(data_uint[i]); - } - break; - } - case 32: { - uint32_t* data_uint = reinterpret_cast(data); - for (int64_t i = 0; i < size_bytes / 4; i++) { - data_uint[i] = SwapNetworkToHost(data_uint[i]); - } - break; - } - case 64: { - uint64_t* data_uint = reinterpret_cast(data); - for (int64_t i = 0; i < size_bytes / 8; i++) { - data_uint[i] = SwapNetworkToHost(data_uint[i]); - } - break; - } - default: - break; - } -} - // see arrow/util/string_builder.h template From fcbfa021f1f285fbeb5681712c5660bf1b0f4675 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 12:43:49 -0300 Subject: [PATCH 31/90] fiddling with nested types --- c/driver/postgresql/postgres_copy_utils.h | 43 +++++++++++++++++++---- c/driver/postgresql/postgres_type.h | 8 ++--- c/driver/postgresql/postgres_type_test.cc | 4 +-- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 875db37d76..6bd84f50a9 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -24,6 +24,7 @@ #include +#include "postgres_type.h" #include "util.h" namespace adbcpq { @@ -82,11 +83,17 @@ ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { class PostgresCopyReader { public: - PostgresCopyReader() : offsets_(nullptr), data_(nullptr) { + PostgresCopyReader(const PostgresType& pg_type) + : pg_type_(std::move(pg_type)), offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - void AppendChild(PostgresCopyReader& child) { children_.push_back(std::move(child)); } + void AppendChild(PostgresCopyReader& child) { + pg_type_.AppendChild(child.pg_type_.field_name(), child.pg_type_); + children_.push_back(std::move(child)); + } + + const PostgresType& InputType() const { return pg_type_; } ArrowErrorCode InitSchema(ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); @@ -126,7 +133,7 @@ class PostgresCopyReader { } protected: - ArrowType type_; + PostgresType pg_type_; ArrowSchemaView schema_view_; ArrowBuffer* offsets_; ArrowBuffer* data_; @@ -136,6 +143,8 @@ class PostgresCopyReader { // Converter for a Postgres boolean (one byte -> bitmap) class PostgresCopyReaderBool : public PostgresCopyReader { public: + PostgresCopyReaderBool(const PostgresType& pg_type) : PostgresCopyReader(pg_type) {} + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -167,6 +176,9 @@ class PostgresCopyReaderBool : public PostgresCopyReader { template class PostgresCopyReaderNetworkEndian : public PostgresCopyReader { public: + PostgresCopyReaderNetworkEndian(const PostgresType& pg_type) + : PostgresCopyReader(pg_type) {} + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -190,6 +202,8 @@ using PostgresCopyReaderNetworkEndian64 = PostgresCopyReaderNetworkEndian(&data, &element_type_oid, error)); + if (element_type_oid != children_[0].InputType().oid()) { + ArrowErrorSet(error, + "Expected array child value with oid %ld but got array child value " + "with oid %ld", + static_cast(children_[0].InputType().oid()), + static_cast(element_type_oid)); + return EINVAL; + } + if (n_dim <= 0) { ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", static_cast(n_dim)); @@ -237,6 +260,10 @@ class PostgresCopyReaderList : public PostgresCopyReader { int32_t lower_bound; NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &lower_bound, error)); + if (lower_bound != 0) { + ArrowErrorSet(error, "Array value with lower bound != 0 is not supported"); + return EINVAL; + } } ArrowBufferView field_data; @@ -249,7 +276,7 @@ class PostgresCopyReaderList : public PostgresCopyReader { // Note: Read() here is a virtual method call int result = children_[0].Read(field_data, array->children[i], error); if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; i++) { + for (int16_t j = 0; j < i; j++) { array->children[j]->length--; } @@ -261,14 +288,16 @@ class PostgresCopyReaderList : public PostgresCopyReader { data.size_bytes -= field_length; } } - // (struct int32_t item_size_bytes, uint8_t[item_size_bytes])[nitems] - return ENOTSUP; + NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); + return NANOARROW_OK; } }; class PostgresCopyReaderStruct : public PostgresCopyReader { public: + PostgresCopyReaderStruct(const PostgresType& pg_type) : PostgresCopyReader(pg_type) {} + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index f36c8c2e97..6596c9d808 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -359,7 +359,7 @@ class PostgresType { PostgresType() : PostgresType(PG_RECV_UNINITIALIZED) {} - void AddRecordChild(const std::string& field_name, const PostgresType& type) { + void AppendChild(const std::string& field_name, const PostgresType& type) { PostgresType child(type); children_.push_back(child.WithFieldName(field_name)); } @@ -379,7 +379,7 @@ class PostgresType { PostgresType Array(uint32_t oid = 0, const std::string& typname = "") const { PostgresType out(PG_RECV_ARRAY); - out.children_.push_back(WithFieldName("item")); + out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; return out; @@ -391,7 +391,7 @@ class PostgresType { PostgresType Range(uint32_t oid = 0, const std::string& typname = "") const { PostgresType out(PG_RECV_RANGE); - out.children_.push_back(WithFieldName("item")); + out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; return out; @@ -535,7 +535,7 @@ class PostgresTypeResolver { for (const auto& child_item : child_desc) { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(child_item.first, &child, error)); - out.AddRecordChild(child_item.second, child); + out.AppendChild(child_item.second, child); } mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 704fe2c3b2..d55aee98cb 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -140,7 +140,7 @@ TEST(PostgresTypeTest, PostgresTypeBasic) { EXPECT_EQ(domain.recv(), type.recv()); PostgresType record(PostgresType::PG_RECV_RECORD); - record.AddRecordChild("col1", type); + record.AppendChild("col1", type); EXPECT_EQ(record.recv(), PostgresType::PG_RECV_RECORD); EXPECT_EQ(record.n_children(), 1); EXPECT_EQ(record.child(0)->recv(), type.recv()); @@ -199,7 +199,7 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { ArrowSchemaInit(&schema); PostgresType record(PostgresType::PG_RECV_RECORD); - record.AddRecordChild("col1", PostgresType(PostgresType::PG_RECV_BOOL)); + record.AppendChild("col1", PostgresType(PostgresType::PG_RECV_BOOL)); EXPECT_EQ(record.SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+s"); EXPECT_STREQ(schema.children[0]->format, "b"); From 547f4912a67fec65e25596520790ae11d8ae632d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 13:55:05 -0300 Subject: [PATCH 32/90] some more tweaks, add test data --- c/driver/postgresql/postgres_copy_utils.h | 1 - .../postgresql/postgres_copy_utils_test.cc | 92 ++++++++++++++++++- 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 6bd84f50a9..295cba21a1 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -89,7 +89,6 @@ class PostgresCopyReader { } void AppendChild(PostgresCopyReader& child) { - pg_type_.AppendChild(child.pg_type_.field_name(), child.pg_type_); children_.push_back(std::move(child)); } diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index c25a12af7b..560d583396 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -20,6 +20,92 @@ #include "postgres_copy_utils.h" -TEST(PostgresCopyUtilsTest, PostgresTypeBasic) { - EXPECT_EQ(4, 4); -} +// COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS +// drvd("col")) TO STDOUT; +static uint8_t kTestPgCopyBoolean[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopySmallInt[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xff, 0xff, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyInteger[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, + 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyBigInt[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), +// (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyReal[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xc2, 0xf6, 0xe9, + 0x79, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xbf, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, + 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), +// (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyDoublePrecision[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xc0, 0x5e, 0xdd, + 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xbf, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x3f, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, + 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), +// (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyDoublePrecision[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, + 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, +// 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyIntegerArray[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0x85, 0x00, 0x00, 0x00, + 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, + 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); +// COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), +// ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyCustomRecord[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x00, 0x7b, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x7c, 0x8c, + 0x9f, 0xbe, 0x76, 0xc8, 0xb4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, + 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresTypeBasic) { EXPECT_EQ(4, 4); } From 45f52f7df2d5914b954d23b02293c5937a902906 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 13:56:32 -0300 Subject: [PATCH 33/90] tidy --- c/driver/postgresql/postgres_copy_utils.h | 20 ++++++++++--------- .../postgresql/postgres_copy_utils_test.cc | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 295cba21a1..636d50c732 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -83,14 +83,12 @@ ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { class PostgresCopyReader { public: - PostgresCopyReader(const PostgresType& pg_type) + explicit PostgresCopyReader(const PostgresType& pg_type) : pg_type_(std::move(pg_type)), offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - void AppendChild(PostgresCopyReader& child) { - children_.push_back(std::move(child)); - } + void AppendChild(PostgresCopyReader& child) { children_.push_back(std::move(child)); } const PostgresType& InputType() const { return pg_type_; } @@ -142,7 +140,8 @@ class PostgresCopyReader { // Converter for a Postgres boolean (one byte -> bitmap) class PostgresCopyReaderBool : public PostgresCopyReader { public: - PostgresCopyReaderBool(const PostgresType& pg_type) : PostgresCopyReader(pg_type) {} + explicit PostgresCopyReaderBool(const PostgresType& pg_type) + : PostgresCopyReader(pg_type) {} ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { @@ -175,7 +174,7 @@ class PostgresCopyReaderBool : public PostgresCopyReader { template class PostgresCopyReaderNetworkEndian : public PostgresCopyReader { public: - PostgresCopyReaderNetworkEndian(const PostgresType& pg_type) + explicit PostgresCopyReaderNetworkEndian(const PostgresType& pg_type) : PostgresCopyReader(pg_type) {} ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, @@ -201,7 +200,8 @@ using PostgresCopyReaderNetworkEndian64 = PostgresCopyReaderNetworkEndian Date: Tue, 4 Apr 2023 14:27:23 -0300 Subject: [PATCH 34/90] fix sign compare --- c/driver/postgresql/postgres_copy_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 636d50c732..f171a3648b 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -71,7 +71,7 @@ inline int64_t ReadUnsafe(ArrowBufferView* data) { template ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { - if (data->size_bytes < sizeof(T)) { + if (data->size_bytes < static_cast(sizeof(T))) { ArrowErrorSet(error, "Unexpected end of input (expected %d bytes but found %ld)", (int)sizeof(T), (long)data->size_bytes); return EINVAL; From 4baafe389e3c25f903f21e17b0b780e26749eb3b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 15:03:35 -0300 Subject: [PATCH 35/90] maybe fix cpptidy --- c/driver/postgresql/postgres_copy_utils.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index f171a3648b..d9ab1d010b 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -73,7 +74,8 @@ template ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { if (data->size_bytes < static_cast(sizeof(T))) { ArrowErrorSet(error, "Unexpected end of input (expected %d bytes but found %ld)", - (int)sizeof(T), (long)data->size_bytes); + static_cast(sizeof(T)), + static_cast(data->size_bytes)); // NOLINT(runtime/int) return EINVAL; } @@ -88,7 +90,9 @@ class PostgresCopyReader { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - void AppendChild(PostgresCopyReader& child) { children_.push_back(std::move(child)); } + void AppendChild(const PostgresCopyReader& child) { + children_.push_back(std::move(child)); + } const PostgresType& InputType() const { return pg_type_; } @@ -242,7 +246,7 @@ class PostgresCopyReaderList : public PostgresCopyReader { "Expected array child value with oid %ld but got array child value " "with oid %ld", static_cast(children_[0].InputType().oid()), - static_cast(element_type_oid)); + static_cast(element_type_oid)); // NOLINT(runtime/int) return EINVAL; } From 65f626629b57ea5931c760ed8ef55b19dfd4aba7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 4 Apr 2023 16:48:02 -0300 Subject: [PATCH 36/90] sketching --- c/driver/postgresql/postgres_copy_utils.h | 181 ++++++++++++++++++++-- c/driver/postgresql/postgres_type.h | 2 +- 2 files changed, 171 insertions(+), 12 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index d9ab1d010b..a36f14e885 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -90,7 +90,9 @@ class PostgresCopyReader { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - void AppendChild(const PostgresCopyReader& child) { + virtual ~PostgresCopyReader() {} + + void AppendChild(std::unique_ptr child) { children_.push_back(std::move(child)); } @@ -138,7 +140,7 @@ class PostgresCopyReader { ArrowSchemaView schema_view_; ArrowBuffer* offsets_; ArrowBuffer* data_; - std::vector children_; + std::vector> children_; }; // Converter for a Postgres boolean (one byte -> bitmap) @@ -189,16 +191,12 @@ class PostgresCopyReaderNetworkEndian : public PostgresCopyReader { T value_uint; NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &value_uint, error)); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&value_uint, sizeof(T))); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value_uint, sizeof(T))); array->length++; return NANOARROW_OK; } }; -using PostgresCopyReaderNetworkEndian16 = PostgresCopyReaderNetworkEndian; -using PostgresCopyReaderNetworkEndian32 = PostgresCopyReaderNetworkEndian; -using PostgresCopyReaderNetworkEndian64 = PostgresCopyReaderNetworkEndian; - // Converter for Pg->Arrow conversions whose Arrow representation is simply the // bytes of the field representation. This can be used with binary and string // Arrow types and any postgres type. @@ -241,11 +239,11 @@ class PostgresCopyReaderList : public PostgresCopyReader { uint32_t element_type_oid; NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &element_type_oid, error)); - if (element_type_oid != children_[0].InputType().oid()) { + if (element_type_oid != children_[0]->InputType().oid()) { ArrowErrorSet(error, "Expected array child value with oid %ld but got array child value " "with oid %ld", - static_cast(children_[0].InputType().oid()), + static_cast(children_[0]->InputType().oid()), static_cast(element_type_oid)); // NOLINT(runtime/int) return EINVAL; } @@ -278,7 +276,7 @@ class PostgresCopyReaderList : public PostgresCopyReader { field_data.size_bytes = field_length; // Note: Read() here is a virtual method call - int result = children_[0].Read(field_data, array->children[i], error); + int result = children_[0]->Read(field_data, array->children[i], error); if (result == EOVERFLOW) { for (int16_t j = 0; j < i; j++) { array->children[j]->length--; @@ -323,7 +321,7 @@ class PostgresCopyReaderStruct : public PostgresCopyReader { field_data.size_bytes = field_length; // Note: Read() here is a virtual method call - int result = children_[i].Read(field_data, array->children[i], error); + int result = children_[i]->Read(field_data, array->children[i], error); if (result == EOVERFLOW) { for (int16_t j = 0; j < i; i++) { array->children[j]->length--; @@ -343,4 +341,165 @@ class PostgresCopyReaderStruct : public PostgresCopyReader { } }; +// Factor for a PostgresCopyReader that checks the logical types. Curently the only +// way to generate schema is to use PostgresType::SetSchema() and so it's unlikely +// that these errors will fire. +ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* schema, + PostgresCopyReader** out) { + ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_BOOL: + *out = new PostgresCopyReaderNetworkEndian(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_INT16: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_INT2: + *out = new PostgresCopyReaderNetworkEndian(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_INT32: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_INT4: + *out = new PostgresCopyReaderNetworkEndian(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_FLOAT: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_FLOAT4: + *out = new PostgresCopyReaderNetworkEndian(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_INT64: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_INT8: + *out = new PostgresCopyReaderNetworkEndian(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_DOUBLE: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_FLOAT8: + *out = new PostgresCopyReaderNetworkEndian(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_STRING: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_CHAR: + case PostgresType::PG_RECV_VARCHAR: + case PostgresType::PG_RECV_TEXT: + *out = new PostgresCopyReaderList(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_BINARY: + // No need to check pg_type here: we can return the bytes of any + // Postgres type as binary. + *out = new PostgresCopyReaderBinary(pg_type); + return NANOARROW_OK; + + case NANOARROW_TYPE_LIST: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_ARRAY: + *out = new PostgresCopyReaderList(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + + case NANOARROW_TYPE_STRUCT: + switch (pg_type.recv()) { + case PostgresType::PG_RECV_RECORD: + *out = new PostgresCopyReaderStruct(pg_type); + return NANOARROW_OK; + default: + return ENOTSUP; + } + default: + return ENOTSUP; + } +} + +class PostgresCopyStreamReader { + public: + PostgresCopyStreamReader(const PostgresType& pg_type) : pg_type_(pg_type) {} + + ArrowErrorCode InferSchema(ArrowError* error) { + if (pg_type_.recv() != PostgresType::PG_RECV_RECORD) { + return EINVAL; + } + + schema_.reset(); + ArrowSchemaInit(schema_.get()); + NANOARROW_RETURN_NOT_OK(pg_type_.SetSchema(schema_.get())); + return NANOARROW_OK; + } + + ArrowErrorCode InitFieldReaders(ArrowError* error) { + if (schema_->release == nullptr) { + return EINVAL; + } + + if (schema_->n_children != pg_type_.n_children()) { + return EINVAL; + } + + root_reader_.reset(new PostgresCopyReaderStruct(pg_type_)); + for (int64_t i = 0; i < pg_type_.n_children(); i++) { + PostgresCopyReader* child_reader = nullptr; + int result = + MakeCopyFieldReader(*pg_type_.child(i), schema_->children[i], &child_reader); + if (result != NANOARROW_OK) { + ArrowErrorSet(error, "Can't resolve converter for Postgres type '%s'", + pg_type_.child(i)->typname().c_str()); + return result; + } + + root_reader_->AppendChild(std::unique_ptr(child_reader)); + } + + NANOARROW_RETURN_NOT_OK(root_reader_->InitSchema(schema_.get())); + return NANOARROW_OK; + } + + ArrowErrorCode GetSchema(ArrowSchema* out) { + return ArrowSchemaDeepCopy(schema_.get(), out); + } + + ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { return ENOTSUP; } + + ArrowErrorCode ReadRecord(ArrowBufferView data, ArrowError* error) { return ENOTSUP; } + + void GetArray(ArrowArray* out) { ArrowArrayMove(array_.get(), out); } + + private: + PostgresType pg_type_; + std::unique_ptr root_reader_; + nanoarrow::UniqueSchema schema_; + nanoarrow::UniqueArray array_; +}; + } // namespace adbcpq \ No newline at end of file diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 6596c9d808..df10e3878e 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -399,7 +399,7 @@ class PostgresType { uint32_t oid() const { return oid_; } PgRecv recv() const { return recv_; } - const std::string& typname() { return typname_; } + const std::string& typname() const { return typname_; } const std::string& field_name() const { return field_name_; } int64_t n_children() const { return static_cast(children_.size()); } const PostgresType* child(int64_t i) const { return &children_[i]; } From ee56a2b31533c6deecbf1621d8d7cf8bc5916220 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 14:58:56 -0300 Subject: [PATCH 37/90] some rethinking of the copy reader class def --- c/driver/postgresql/postgres_copy_utils.h | 161 +++++++++++----------- 1 file changed, 79 insertions(+), 82 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index a36f14e885..555a1a02df 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -83,27 +83,24 @@ ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { return NANOARROW_OK; } -class PostgresCopyReader { +class PostgresCopyFieldReader { public: - explicit PostgresCopyReader(const PostgresType& pg_type) - : pg_type_(std::move(pg_type)), offsets_(nullptr), data_(nullptr) { + explicit PostgresCopyFieldReader() : offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } - virtual ~PostgresCopyReader() {} + virtual ~PostgresCopyFieldReader() {} - void AppendChild(std::unique_ptr child) { - children_.push_back(std::move(child)); - } + void Init(const PostgresType& pg_type) { pg_type_ = pg_type; } const PostgresType& InputType() const { return pg_type_; } - ArrowErrorCode InitSchema(ArrowSchema* schema) { + virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); return NANOARROW_OK; } - ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { + virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); @@ -131,7 +128,7 @@ class PostgresCopyReader { return ENOTSUP; } - ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { + virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { return NANOARROW_OK; } @@ -140,15 +137,12 @@ class PostgresCopyReader { ArrowSchemaView schema_view_; ArrowBuffer* offsets_; ArrowBuffer* data_; - std::vector> children_; + std::vector> children_; }; -// Converter for a Postgres boolean (one byte -> bitmap) -class PostgresCopyReaderBool : public PostgresCopyReader { +// Reader for a Postgres boolean (one byte -> bitmap) +class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { public: - explicit PostgresCopyReaderBool(const PostgresType& pg_type) - : PostgresCopyReader(pg_type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -175,14 +169,11 @@ class PostgresCopyReaderBool : public PostgresCopyReader { } }; -// Converter for Pg->Arrow conversions whose representations are identical minus +// Reader for Pg->Arrow conversions whose representations are identical minus // the bswap from network endian. This includes all integral and float types. template -class PostgresCopyReaderNetworkEndian : public PostgresCopyReader { +class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { public: - explicit PostgresCopyReaderNetworkEndian(const PostgresType& pg_type) - : PostgresCopyReader(pg_type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -197,14 +188,11 @@ class PostgresCopyReaderNetworkEndian : public PostgresCopyReader { } }; -// Converter for Pg->Arrow conversions whose Arrow representation is simply the +// Reader for Pg->Arrow conversions whose Arrow representation is simply the // bytes of the field representation. This can be used with binary and string -// Arrow types and any postgres type. -class PostgresCopyReaderBinary : public PostgresCopyReader { +// Arrow types and any Postgres type. +class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { public: - explicit PostgresCopyReaderBinary(const PostgresType& pg_type) - : PostgresCopyReader(pg_type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -221,11 +209,9 @@ class PostgresCopyReaderBinary : public PostgresCopyReader { } }; -class PostgresCopyReaderList : public PostgresCopyReader { +// +class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { public: - explicit PostgresCopyReaderList(const PostgresType& pg_type) - : PostgresCopyReader(pg_type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -296,11 +282,8 @@ class PostgresCopyReaderList : public PostgresCopyReader { } }; -class PostgresCopyReaderStruct : public PostgresCopyReader { +class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { public: - explicit PostgresCopyReaderStruct(const PostgresType& pg_type) - : PostgresCopyReader(pg_type) {} - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -341,11 +324,18 @@ class PostgresCopyReaderStruct : public PostgresCopyReader { } }; -// Factor for a PostgresCopyReader that checks the logical types. Curently the only -// way to generate schema is to use PostgresType::SetSchema() and so it's unlikely -// that these errors will fire. +// Factory for a PostgresCopyFieldReader that instantiates the proper subclass +// and gives a nice error for Postgres type -> Arrow type conversions that aren't +// supported. +ArrowErrorCode ErrorCantConvert(ArrowError* error, const PostgresType& pg_type, + const ArrowSchemaView& schema_view) { + ArrowErrorSet(error, "Can't convert Postgres type '%s' to Arrow type '%s'", + pg_type.typname().c_str(), ArrowTypeString(schema_view.type)); + return EINVAL; +} + ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* schema, - PostgresCopyReader** out) { + PostgresCopyFieldReader** out, ArrowError* error) { ArrowSchemaView schema_view; NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); @@ -353,55 +343,55 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch case NANOARROW_TYPE_BOOL: switch (pg_type.recv()) { case PostgresType::PG_RECV_BOOL: - *out = new PostgresCopyReaderNetworkEndian(pg_type); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_INT16: switch (pg_type.recv()) { case PostgresType::PG_RECV_INT2: - *out = new PostgresCopyReaderNetworkEndian(pg_type); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_INT32: switch (pg_type.recv()) { case PostgresType::PG_RECV_INT4: - *out = new PostgresCopyReaderNetworkEndian(pg_type); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_FLOAT: switch (pg_type.recv()) { case PostgresType::PG_RECV_FLOAT4: - *out = new PostgresCopyReaderNetworkEndian(pg_type); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_INT64: switch (pg_type.recv()) { case PostgresType::PG_RECV_INT8: - *out = new PostgresCopyReaderNetworkEndian(pg_type); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_DOUBLE: switch (pg_type.recv()) { case PostgresType::PG_RECV_FLOAT8: - *out = new PostgresCopyReaderNetworkEndian(pg_type); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_STRING: @@ -409,52 +399,69 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch case PostgresType::PG_RECV_CHAR: case PostgresType::PG_RECV_VARCHAR: case PostgresType::PG_RECV_TEXT: - *out = new PostgresCopyReaderList(pg_type); + *out = new PostgresCopyBinaryFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_BINARY: // No need to check pg_type here: we can return the bytes of any // Postgres type as binary. - *out = new PostgresCopyReaderBinary(pg_type); + *out = new PostgresCopyBinaryFieldReader(); return NANOARROW_OK; case NANOARROW_TYPE_LIST: switch (pg_type.recv()) { case PostgresType::PG_RECV_ARRAY: - *out = new PostgresCopyReaderList(pg_type); + *out = new PostgresCopyArrayFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_STRUCT: switch (pg_type.recv()) { case PostgresType::PG_RECV_RECORD: - *out = new PostgresCopyReaderStruct(pg_type); + *out = new PostgresCopyRecordFieldReader(); return NANOARROW_OK; default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } default: - return ENOTSUP; + return ErrorCantConvert(error, pg_type, schema_view); } } class PostgresCopyStreamReader { public: - PostgresCopyStreamReader(const PostgresType& pg_type) : pg_type_(pg_type) {} + ArrowErrorCode Init(const PostgresType& pg_type) { + if (pg_type.recv() != PostgresType::PG_RECV_RECORD) { + return EINVAL; + } - ArrowErrorCode InferSchema(ArrowError* error) { - if (pg_type_.recv() != PostgresType::PG_RECV_RECORD) { + root_reader_.Init(pg_type); + return NANOARROW_OK; + } + + ArrowErrorCode SetOutputSchema(ArrowSchema* schema, ArrowError* error) { + if (schema_->n_children != root_reader_.InputType().n_children()) { + ArrowErrorSet(error, + "Expected output schema with %ld columns to match Postgres input but " + "got schema with %ld columns", + static_cast(root_reader_.InputType().n_children()), + static_cast(schema->n_children)); return EINVAL; } + schema_.reset(schema); + return NANOARROW_OK; + } + + ArrowErrorCode InferOutputSchema(ArrowError* error) { schema_.reset(); ArrowSchemaInit(schema_.get()); - NANOARROW_RETURN_NOT_OK(pg_type_.SetSchema(schema_.get())); + NANOARROW_RETURN_NOT_OK(root_reader_.InputType().SetSchema(schema_.get())); return NANOARROW_OK; } @@ -463,25 +470,16 @@ class PostgresCopyStreamReader { return EINVAL; } - if (schema_->n_children != pg_type_.n_children()) { - return EINVAL; - } - - root_reader_.reset(new PostgresCopyReaderStruct(pg_type_)); - for (int64_t i = 0; i < pg_type_.n_children(); i++) { - PostgresCopyReader* child_reader = nullptr; - int result = - MakeCopyFieldReader(*pg_type_.child(i), schema_->children[i], &child_reader); - if (result != NANOARROW_OK) { - ArrowErrorSet(error, "Can't resolve converter for Postgres type '%s'", - pg_type_.child(i)->typname().c_str()); - return result; - } + const PostgresType& root_type = root_reader_.InputType(); - root_reader_->AppendChild(std::unique_ptr(child_reader)); + for (int64_t i = 0; i < root_type.n_children(); i++) { + const PostgresType& child_type = *root_type.child(i); + PostgresCopyFieldReader* child_reader; + NANOARROW_RETURN_NOT_OK( + MakeCopyFieldReader(child_type, schema_->children[i], &child_reader, error)); } - NANOARROW_RETURN_NOT_OK(root_reader_->InitSchema(schema_.get())); + NANOARROW_RETURN_NOT_OK(root_reader_.InitSchema(schema_.get())); return NANOARROW_OK; } @@ -496,10 +494,9 @@ class PostgresCopyStreamReader { void GetArray(ArrowArray* out) { ArrowArrayMove(array_.get(), out); } private: - PostgresType pg_type_; - std::unique_ptr root_reader_; + PostgresCopyRecordFieldReader root_reader_; nanoarrow::UniqueSchema schema_; nanoarrow::UniqueArray array_; }; -} // namespace adbcpq \ No newline at end of file +} // namespace adbcpq From 8f9adce89283f30ffd8cef4109c50f80f295138a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 15:35:50 -0300 Subject: [PATCH 38/90] maybe get recursion to work for instantiating converters --- c/driver/postgresql/postgres_copy_utils.h | 100 +++++++++++++++++----- 1 file changed, 77 insertions(+), 23 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 555a1a02df..ae749a04d6 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -212,6 +212,11 @@ class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { // class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { public: + void InitChild(std::unique_ptr child) { + child_ = std::move(child); + child_->Init(*pg_type_.child(0)); + } + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -225,21 +230,21 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { uint32_t element_type_oid; NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &element_type_oid, error)); - if (element_type_oid != children_[0]->InputType().oid()) { - ArrowErrorSet(error, - "Expected array child value with oid %ld but got array child value " - "with oid %ld", - static_cast(children_[0]->InputType().oid()), - static_cast(element_type_oid)); // NOLINT(runtime/int) - return EINVAL; - } + // We could validate the OID here, but this is a poor fit for all cases + // (e.g. testing) since the OID can be specific to each database - if (n_dim <= 0) { + if (n_dim < 0) { ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", static_cast(n_dim)); return EINVAL; } + // This is apparently allowed + if (n_dim == 0) { + NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); + return NANOARROW_OK; + } + int64_t n_items = 1; for (int32_t i = 0; i < n_dim; i++) { int32_t dim_size; @@ -262,14 +267,7 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { field_data.size_bytes = field_length; // Note: Read() here is a virtual method call - int result = children_[0]->Read(field_data, array->children[i], error); - if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; j++) { - array->children[j]->length--; - } - - return result; - } + NANOARROW_RETURN_NOT_OK(child_->Read(field_data, array->children[0], error)); if (field_length > 0) { data.data.as_uint8 += field_length; @@ -280,10 +278,19 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); return NANOARROW_OK; } + + private: + std::unique_ptr child_; }; class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { public: + void AppendChild(std::unique_ptr child) { + int64_t child_i = static_cast(children_.size()); + children_.push_back(std::move(child)); + children_[child_i]->Init(*pg_type_.child(child_i)); + } + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -305,11 +312,18 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { // Note: Read() here is a virtual method call int result = children_[i]->Read(field_data, array->children[i], error); + + // On overflow, pretend all previous children for this struct were never + // appended to. This leaves array in a valid state in the specific case + // where EOVERFLOW was returned so that a higher level caller can attempt + // to try again after creating a new array. if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; i++) { + for (int16_t j = 0; j < i; j++) { array->children[j]->length--; } + return result; + } else if (result != NANOARROW_OK) { return result; } @@ -322,6 +336,9 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { array->length++; return NANOARROW_OK; } + + private: + std::vector> children_; }; // Factory for a PostgresCopyFieldReader that instantiates the proper subclass @@ -343,7 +360,7 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch case NANOARROW_TYPE_BOOL: switch (pg_type.recv()) { case PostgresType::PG_RECV_BOOL: - *out = new PostgresCopyNetworkEndianFieldReader(); + *out = new PostgresCopyBooleanFieldReader(); return NANOARROW_OK; default: return ErrorCantConvert(error, pg_type, schema_view); @@ -413,18 +430,55 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch case NANOARROW_TYPE_LIST: switch (pg_type.recv()) { - case PostgresType::PG_RECV_ARRAY: - *out = new PostgresCopyArrayFieldReader(); + case PostgresType::PG_RECV_ARRAY: { + if (pg_type.n_children() != 1) { + ArrowErrorSet(error, + "Expected Postgres array type to have one child but found %ld", + static_cast(pg_type.n_children())); + return EINVAL; + } + + auto array_reader = std::unique_ptr( + new PostgresCopyArrayFieldReader()); + + PostgresCopyFieldReader* child_reader; + NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( + *pg_type.child(0), schema->children[0], &child_reader, error)); + array_reader->InitChild(std::unique_ptr(child_reader)); + + *out = array_reader.release(); return NANOARROW_OK; + } default: return ErrorCantConvert(error, pg_type, schema_view); } case NANOARROW_TYPE_STRUCT: switch (pg_type.recv()) { - case PostgresType::PG_RECV_RECORD: - *out = new PostgresCopyRecordFieldReader(); + case PostgresType::PG_RECV_RECORD: { + if (pg_type.n_children() != schema->n_children) { + ArrowErrorSet(error, + "Can't convert Postgres record type with %ld chlidren to Arrow " + "struct type with %ld children", + static_cast(pg_type.n_children()), + static_cast(schema->n_children)); + return EINVAL; + } + + auto record_reader = std::unique_ptr( + new PostgresCopyRecordFieldReader()); + + for (int64_t i = 0; i < pg_type.n_children(); i++) { + PostgresCopyFieldReader* child_reader; + NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( + *pg_type.child(i), schema->children[i], &child_reader, error)); + record_reader->AppendChild( + std::unique_ptr(child_reader)); + } + + *out = record_reader.release(); return NANOARROW_OK; + } default: return ErrorCantConvert(error, pg_type, schema_view); } From 4b143d1edcb7098a8310b27314c016636fb70fec Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 16:07:42 -0300 Subject: [PATCH 39/90] maybe full lifecycle --- c/driver/postgresql/postgres_copy_utils.h | 59 ++++++++++++++++++++--- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index ae749a04d6..384c0d4d67 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -100,10 +100,7 @@ class PostgresCopyFieldReader { return NANOARROW_OK; } - virtual ArrowErrorCode InitArray(ArrowArray* array, ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, nullptr)); - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array)); - + virtual ArrowErrorCode InitArray(ArrowArray* array) { // Cache some buffer pointers for (int32_t i = 0; i < 3; i++) { switch (schema_view_.layout.buffer_type[i]) { @@ -217,6 +214,18 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { child_->Init(*pg_type_.child(0)); } + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); + NANOARROW_RETURN_NOT_OK(child_->InitSchema(schema->children[0])); + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); + NANOARROW_RETURN_NOT_OK(child_->InitArray(array->children[0])); + return NANOARROW_OK; + } + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -291,6 +300,24 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { children_[child_i]->Init(*pg_type_.child(child_i)); } + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); + for (int64_t i = 0; i < schema->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); + } + + return NANOARROW_OK; + } + ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, ArrowError* error) override { if (data.size_bytes <= 0) { @@ -499,6 +526,14 @@ class PostgresCopyStreamReader { } ArrowErrorCode SetOutputSchema(ArrowSchema* schema, ArrowError* error) { + if (std::string(schema_->format) != "+s") { + ArrowErrorSet( + error, + "Expected output schema of type struct but got output schema with format '%s'", + schema_->format); + return EINVAL; + } + if (schema_->n_children != root_reader_.InputType().n_children()) { ArrowErrorSet(error, "Expected output schema with %ld columns to match Postgres input but " @@ -537,14 +572,22 @@ class PostgresCopyStreamReader { return NANOARROW_OK; } + ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { return ENOTSUP; } + + ArrowErrorCode ReadRecord(ArrowBufferView data, ArrowError* error) { + if (array_->release == nullptr) { + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); + NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); + } + + NANOARROW_RETURN_NOT_OK(root_reader_.Read(data, array_.get(), error)); + return NANOARROW_OK; + } + ArrowErrorCode GetSchema(ArrowSchema* out) { return ArrowSchemaDeepCopy(schema_.get(), out); } - ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { return ENOTSUP; } - - ArrowErrorCode ReadRecord(ArrowBufferView data, ArrowError* error) { return ENOTSUP; } - void GetArray(ArrowArray* out) { ArrowArrayMove(array_.get(), out); } private: From f62e62a94c2612cff30704c9a42351704d342a55 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 16:20:14 -0300 Subject: [PATCH 40/90] totally theoretical header reader --- c/driver/postgresql/postgres_copy_utils.h | 38 +++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 384c0d4d67..971e31e50a 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -572,11 +572,45 @@ class PostgresCopyStreamReader { return NANOARROW_OK; } - ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { return ENOTSUP; } + ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { + if (data.size_bytes < sizeof(kPgCopyBinarySignature)) { + ArrowErrorSet(error, + "Expected PGCOPY signature of %ld bytes at beginning of stream but " + "found %ld bytes of input", + static_cast(sizeof(kPgCopyBinarySignature)), + static_cast(data.size_bytes)); + return EINVAL; + } + + if (memcmp(data.data.data, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature)) != + 0) { + ArrowErrorSet(error, "Invalid PGCOPY signature at beginning of stream"); + return EINVAL; + } + + uint32_t flags; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &flags, error)); + uint32_t extension_length; + NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &extension_length, error)); + + if (data.size_bytes < static_cast(extension_length)) { + ArrowErrorSet(error, + "Expected %ld bytes of extension metadata at start of stream but " + "found %ld bytes of input", + static_cast(extension_length), + static_cast(data.size_bytes)); + return EINVAL; + } + + data.data.as_uint8 += extension_length; + data.size_bytes -= extension_length; + return NANOARROW_OK; + } ArrowErrorCode ReadRecord(ArrowBufferView data, ArrowError* error) { if (array_->release == nullptr) { - NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); + NANOARROW_RETURN_NOT_OK( + ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); } From 7abb8712244be380eea967b0ed65b197239d4824 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 21:59:32 -0300 Subject: [PATCH 41/90] tidy + build errors --- c/driver/postgresql/postgres_copy_utils.h | 19 +++++----- .../postgresql/postgres_copy_utils_test.cc | 36 ++++++++++++++++++- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 971e31e50a..d66640344a 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -85,7 +86,7 @@ ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { class PostgresCopyFieldReader { public: - explicit PostgresCopyFieldReader() : offsets_(nullptr), data_(nullptr) { + PostgresCopyFieldReader() : offsets_(nullptr), data_(nullptr) { memset(&schema_view_, 0, sizeof(ArrowSchemaView)); } @@ -459,9 +460,9 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch switch (pg_type.recv()) { case PostgresType::PG_RECV_ARRAY: { if (pg_type.n_children() != 1) { - ArrowErrorSet(error, - "Expected Postgres array type to have one child but found %ld", - static_cast(pg_type.n_children())); + ArrowErrorSet( + error, "Expected Postgres array type to have one child but found %ld", + static_cast(pg_type.n_children())); // NOLINT(runtime/int) return EINVAL; } @@ -488,7 +489,7 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch "Can't convert Postgres record type with %ld chlidren to Arrow " "struct type with %ld children", static_cast(pg_type.n_children()), - static_cast(schema->n_children)); + static_cast(schema->n_children)); // NOLINT(runtime/int) return EINVAL; } @@ -539,7 +540,7 @@ class PostgresCopyStreamReader { "Expected output schema with %ld columns to match Postgres input but " "got schema with %ld columns", static_cast(root_reader_.InputType().n_children()), - static_cast(schema->n_children)); + static_cast(schema->n_children)); // NOLINT(runtime/int) return EINVAL; } @@ -573,12 +574,12 @@ class PostgresCopyStreamReader { } ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { - if (data.size_bytes < sizeof(kPgCopyBinarySignature)) { + if (data.size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { ArrowErrorSet(error, "Expected PGCOPY signature of %ld bytes at beginning of stream but " "found %ld bytes of input", static_cast(sizeof(kPgCopyBinarySignature)), - static_cast(data.size_bytes)); + static_cast(data.size_bytes)); // NOLINT(runtime/int) return EINVAL; } @@ -598,7 +599,7 @@ class PostgresCopyStreamReader { "Expected %ld bytes of extension metadata at start of stream but " "found %ld bytes of input", static_cast(extension_length), - static_cast(data.size_bytes)); + static_cast(data.size_bytes)); // NOLINT(runtime/int) return EINVAL; } diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index 4694057362..7829e7de43 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -27,6 +27,10 @@ static uint8_t kTestPgCopyBoolean[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { + EXPECT_EQ(sizeof(kTestPgCopyBoolean), sizeof(kTestPgCopyBoolean)); +} + // COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), // (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopySmallInt[] = { @@ -36,6 +40,10 @@ static uint8_t kTestPgCopySmallInt[] = { 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { + EXPECT_EQ(sizeof(kTestPgCopySmallInt), sizeof(kTestPgCopySmallInt)); +} + // COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), // (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyInteger[] = { @@ -45,6 +53,10 @@ static uint8_t kTestPgCopyInteger[] = { 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { + EXPECT_EQ(sizeof(kTestPgCopyInteger), sizeof(kTestPgCopyInteger)); +} + // COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), // (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyBigInt[] = { @@ -55,6 +67,10 @@ static uint8_t kTestPgCopyBigInt[] = { 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { + EXPECT_EQ(sizeof(kTestPgCopyBigInt), sizeof(kTestPgCopyBigInt)); +} + // COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), // (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyReal[] = { @@ -64,6 +80,10 @@ static uint8_t kTestPgCopyReal[] = { 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { + EXPECT_EQ(sizeof(kTestPgCopyReal), sizeof(kTestPgCopyReal)); +} + // COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), // (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyDoublePrecision[] = { @@ -74,6 +94,10 @@ static uint8_t kTestPgCopyDoublePrecision[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { + EXPECT_EQ(sizeof(kTestPgCopyDoublePrecision), sizeof(kTestPgCopyDoublePrecision)); +} + // COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), // (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyText[] = { @@ -82,6 +106,10 @@ static uint8_t kTestPgCopyText[] = { 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { + EXPECT_EQ(sizeof(kTestPgCopyText), sizeof(kTestPgCopyText)); +} + // COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, // 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyIntegerArray[] = { @@ -95,6 +123,10 @@ static uint8_t kTestPgCopyIntegerArray[] = { 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { + EXPECT_EQ(sizeof(kTestPgCopyIntegerArray), sizeof(kTestPgCopyIntegerArray)); +} + // CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); // COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), // ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); @@ -108,4 +140,6 @@ static uint8_t kTestPgCopyCustomRecord[] = { 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; -TEST(PostgresCopyUtilsTest, PostgresTypeBasic) { EXPECT_EQ(4, 4); } +TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { + EXPECT_EQ(sizeof(kTestPgCopyCustomRecord), sizeof(kTestPgCopyCustomRecord)); +} From da0afb3d4a51314a818795cf382d6015e39a5cb6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 22:45:24 -0300 Subject: [PATCH 42/90] data is a pointer --- c/driver/postgresql/postgres_copy_utils.h | 153 +++++++++++++--------- 1 file changed, 91 insertions(+), 62 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index d66640344a..7b5eedfdd1 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -121,8 +121,8 @@ class PostgresCopyFieldReader { return NANOARROW_OK; } - virtual ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, - ArrowError* error) { + virtual ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, + ArrowArray* array, ArrowError* error) { return ENOTSUP; } @@ -141,22 +141,25 @@ class PostgresCopyFieldReader { // Reader for a Postgres boolean (one byte -> bitmap) class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data.size_bytes <= 0) { + if (field_size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } + if (field_size_bytes != 1) { + ArrowErrorSet(error, "Expected field with one byte but found field with %d bytes", + static_cast(field_size_bytes)); + return EINVAL; + } + int64_t bytes_required = _ArrowBytesForBits(array->length + 1); if (bytes_required > data_->size_bytes) { NANOARROW_RETURN_NOT_OK( ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); } - int8_t value; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &value, error)); - - if (value) { + if (ReadUnsafe(data)) { ArrowBitSet(data_->data, array->length); } else { ArrowBitClear(data_->data, array->length); @@ -172,14 +175,19 @@ class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { template class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data.size_bytes <= 0) { + if (field_size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } - T value_uint; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &value_uint, error)); + if (field_size_bytes != static_cast(sizeof(T))) { + ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", + static_cast(sizeof(T)), static_cast(field_size_bytes)); + return EINVAL; + } + + T value_uint = ReadUnsafe(data); NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value_uint, sizeof(T))); array->length++; return NANOARROW_OK; @@ -191,13 +199,20 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { // Arrow types and any Postgres type. class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { public: - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data.size_bytes <= 0) { + if (data->size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendBufferView(data_, data)); + if (field_size_bytes > data->size_bytes) { + ArrowErrorSet(error, "Expected %d bytes of field data but got %d bytes of input", + static_cast(field_size_bytes), + static_cast(data->size_bytes)); + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, field_size_bytes)); int32_t* offsets = reinterpret_cast(offsets_->data); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( offsets_, offsets[array->length] + static_cast(data_->size_bytes))); @@ -227,18 +242,22 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { return NANOARROW_OK; } - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data.size_bytes <= 0) { + if (data->size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } + // Keep the cursor where we start to parse the array so we can check + // the number of bytes read against the field size when finished + const uint8_t* data0 = data->data.as_uint8; + int32_t n_dim; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &n_dim, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_dim, error)); int32_t flags; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &flags, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); uint32_t element_type_oid; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &element_type_oid, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &element_type_oid, error)); // We could validate the OID here, but this is a poor fit for all cases // (e.g. testing) since the OID can be specific to each database @@ -258,11 +277,11 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { int64_t n_items = 1; for (int32_t i = 0; i < n_dim; i++) { int32_t dim_size; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &dim_size, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &dim_size, error)); n_items *= dim_size; int32_t lower_bound; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &lower_bound, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &lower_bound, error)); if (lower_bound != 0) { ArrowErrorSet(error, "Array value with lower bound != 0 is not supported"); return EINVAL; @@ -271,18 +290,17 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { ArrowBufferView field_data; for (int64_t i = 0; i < n_items; i++) { - int32_t field_length; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &field_length, error)); - field_data.data.as_uint8 = data.data.as_uint8; - field_data.size_bytes = field_length; - - // Note: Read() here is a virtual method call - NANOARROW_RETURN_NOT_OK(child_->Read(field_data, array->children[0], error)); + int32_t child_field_size_bytes; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); + NANOARROW_RETURN_NOT_OK( + child_->Read(data, child_field_size_bytes, array->children[0], error)); + } - if (field_length > 0) { - data.data.as_uint8 += field_length; - data.size_bytes -= field_length; - } + int64_t bytes_read = data->data.as_uint8 - data0; + if (bytes_read != field_size_bytes) { + ArrowErrorSet(error, "Expected to read %d bytes from array field but read %d bytes", + static_cast(field_size_bytes), static_cast(bytes_read)); + return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); @@ -319,27 +337,34 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { return NANOARROW_OK; } - ArrowErrorCode Read(ArrowBufferView data, ArrowArray* array, + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data.size_bytes <= 0) { + if (data->size_bytes == 0) { return ArrowArrayAppendNull(array, 1); } + // Keep the cursor where we start to parse the field so we can check + // the number of bytes read against the field size when finished + const uint8_t* data0 = data->data.as_uint8; + int16_t n_fields; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &n_fields, error)); - if (n_fields < 0) { + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); + if (n_fields == -1) { return ENODATA; + } else if (n_fields != array->n_children) { + ArrowErrorSet(error, + "Expected -1 for end-of-stream or number of fields in output array " + "(%ld) but got %d", + static_cast(array->n_children), static_cast(n_fields)); + return EINVAL; } struct ArrowBufferView field_data; for (uint16_t i = 0; i < n_fields; i++) { - int32_t field_length; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &field_length, error)); - field_data.data.as_uint8 = data.data.as_uint8; - field_data.size_bytes = field_length; - - // Note: Read() here is a virtual method call - int result = children_[i]->Read(field_data, array->children[i], error); + int32_t child_field_size_bytes; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); + int result = + children_[i]->Read(data, child_field_size_bytes, array->children[i], error); // On overflow, pretend all previous children for this struct were never // appended to. This leaves array in a valid state in the specific case @@ -349,16 +374,20 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { for (int16_t j = 0; j < i; j++) { array->children[j]->length--; } + } - return result; - } else if (result != NANOARROW_OK) { + if (result != NANOARROW_OK) { return result; } + } - if (field_length > 0) { - data.data.as_uint8 += field_length; - data.size_bytes -= field_length; - } + // field size == -1 means don't check (e.g., for a top-level row tuple) + int64_t bytes_read = data->data.as_uint8 - data0; + if (field_size_bytes != -1 && bytes_read != field_size_bytes) { + ArrowErrorSet(error, + "Expected to read %d bytes from record field but read %d bytes", + static_cast(field_size_bytes), static_cast(bytes_read)); + return EINVAL; } array->length++; @@ -573,49 +602,49 @@ class PostgresCopyStreamReader { return NANOARROW_OK; } - ArrowErrorCode ReadHeader(ArrowBufferView data, ArrowError* error) { - if (data.size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { + ArrowErrorCode ReadHeader(ArrowBufferView* data, ArrowError* error) { + if (data->size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { ArrowErrorSet(error, "Expected PGCOPY signature of %ld bytes at beginning of stream but " "found %ld bytes of input", static_cast(sizeof(kPgCopyBinarySignature)), - static_cast(data.size_bytes)); // NOLINT(runtime/int) + static_cast(data->size_bytes)); // NOLINT(runtime/int) return EINVAL; } - if (memcmp(data.data.data, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature)) != + if (memcmp(data->data.data, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature)) != 0) { ArrowErrorSet(error, "Invalid PGCOPY signature at beginning of stream"); return EINVAL; } uint32_t flags; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &flags, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); uint32_t extension_length; - NANOARROW_RETURN_NOT_OK(ReadChecked(&data, &extension_length, error)); + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &extension_length, error)); - if (data.size_bytes < static_cast(extension_length)) { + if (data->size_bytes < static_cast(extension_length)) { ArrowErrorSet(error, "Expected %ld bytes of extension metadata at start of stream but " "found %ld bytes of input", static_cast(extension_length), - static_cast(data.size_bytes)); // NOLINT(runtime/int) + static_cast(data->size_bytes)); // NOLINT(runtime/int) return EINVAL; } - data.data.as_uint8 += extension_length; - data.size_bytes -= extension_length; + data->data.as_uint8 += extension_length; + data->size_bytes -= extension_length; return NANOARROW_OK; } - ArrowErrorCode ReadRecord(ArrowBufferView data, ArrowError* error) { + ArrowErrorCode ReadRecord(ArrowBufferView* data, ArrowError* error) { if (array_->release == nullptr) { NANOARROW_RETURN_NOT_OK( ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); } - NANOARROW_RETURN_NOT_OK(root_reader_.Read(data, array_.get(), error)); + NANOARROW_RETURN_NOT_OK(root_reader_.Read(data, -1, array_.get(), error)); return NANOARROW_OK; } From a445999c24dc8ca94a931c887ed61cbe922a81b4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 22:47:52 -0300 Subject: [PATCH 43/90] maybe fix tidy + lint --- c/driver/postgresql/postgres_copy_utils.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 7b5eedfdd1..a9165eec54 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -149,7 +149,7 @@ class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { if (field_size_bytes != 1) { ArrowErrorSet(error, "Expected field with one byte but found field with %d bytes", - static_cast(field_size_bytes)); + static_cast(field_size_bytes)); // NOLINT(runtime/int) return EINVAL; } @@ -183,7 +183,8 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { if (field_size_bytes != static_cast(sizeof(T))) { ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", - static_cast(sizeof(T)), static_cast(field_size_bytes)); + static_cast(sizeof(T)), + static_cast(field_size_bytes)); // NOLINT(runtime/int) return EINVAL; } @@ -208,7 +209,7 @@ class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { if (field_size_bytes > data->size_bytes) { ArrowErrorSet(error, "Expected %d bytes of field data but got %d bytes of input", static_cast(field_size_bytes), - static_cast(data->size_bytes)); + static_cast(data->size_bytes)); // NOLINT(runtime/int) return EINVAL; } @@ -264,7 +265,7 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { if (n_dim < 0) { ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", - static_cast(n_dim)); + static_cast(n_dim)); // NOLINT(runtime/int) return EINVAL; } @@ -299,7 +300,8 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { int64_t bytes_read = data->data.as_uint8 - data0; if (bytes_read != field_size_bytes) { ArrowErrorSet(error, "Expected to read %d bytes from array field but read %d bytes", - static_cast(field_size_bytes), static_cast(bytes_read)); + static_cast(field_size_bytes), + static_cast(bytes_read)); // NOLINT(runtime/int) return EINVAL; } @@ -355,7 +357,8 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { ArrowErrorSet(error, "Expected -1 for end-of-stream or number of fields in output array " "(%ld) but got %d", - static_cast(array->n_children), static_cast(n_fields)); + static_cast(array->n_children), + static_cast(n_fields)); // NOLINT(runtime/int) return EINVAL; } @@ -386,7 +389,8 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { if (field_size_bytes != -1 && bytes_read != field_size_bytes) { ArrowErrorSet(error, "Expected to read %d bytes from record field but read %d bytes", - static_cast(field_size_bytes), static_cast(bytes_read)); + static_cast(field_size_bytes), + static_cast(bytes_read)); // NOLINT(runtime/int) return EINVAL; } @@ -404,7 +408,8 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { ArrowErrorCode ErrorCantConvert(ArrowError* error, const PostgresType& pg_type, const ArrowSchemaView& schema_view) { ArrowErrorSet(error, "Can't convert Postgres type '%s' to Arrow type '%s'", - pg_type.typname().c_str(), ArrowTypeString(schema_view.type)); + pg_type.typname().c_str(), + ArrowTypeString(schema_view.type)); // NOLINT(runtime/int) return EINVAL; } @@ -560,7 +565,7 @@ class PostgresCopyStreamReader { ArrowErrorSet( error, "Expected output schema of type struct but got output schema with format '%s'", - schema_->format); + schema_->format); // NOLINT(runtime/int) return EINVAL; } From edd3f21131667e63daad246ea50634c2b96b4b08 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 5 Apr 2023 23:34:58 -0300 Subject: [PATCH 44/90] pass an actual read test! --- c/driver/postgresql/postgres_copy_utils.h | 14 ++++- .../postgresql/postgres_copy_utils_test.cc | 58 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index a9165eec54..03d18cb265 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -601,6 +601,7 @@ class PostgresCopyStreamReader { PostgresCopyFieldReader* child_reader; NANOARROW_RETURN_NOT_OK( MakeCopyFieldReader(child_type, schema_->children[i], &child_reader, error)); + root_reader_.AppendChild(std::unique_ptr(child_reader)); } NANOARROW_RETURN_NOT_OK(root_reader_.InitSchema(schema_.get())); @@ -623,6 +624,9 @@ class PostgresCopyStreamReader { return EINVAL; } + data->data.as_uint8 += sizeof(kPgCopyBinarySignature); + data->size_bytes -= sizeof(kPgCopyBinarySignature); + uint32_t flags; NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); uint32_t extension_length; @@ -657,7 +661,15 @@ class PostgresCopyStreamReader { return ArrowSchemaDeepCopy(schema_.get(), out); } - void GetArray(ArrowArray* out) { ArrowArrayMove(array_.get(), out); } + ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error) { + if (array_->release == nullptr) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowArrayFinishBuilding(array_.get(), error)); + ArrowArrayMove(array_.get(), out); + return NANOARROW_OK; + } private: PostgresCopyRecordFieldReader root_reader_; diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index 7829e7de43..97d5fb6bab 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -20,6 +20,9 @@ #include "postgres_copy_utils.h" +using adbcpq::PostgresCopyStreamReader; +using adbcpq::PostgresType; + // COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS // drvd("col")) TO STDOUT; static uint8_t kTestPgCopyBoolean[] = { @@ -27,6 +30,61 @@ static uint8_t kTestPgCopyBoolean[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +TEST(PostgresCopyUtilsTest, PostgresCopyReadStreamBasic) { + auto col_type = PostgresType(PostgresType::PG_RECV_BOOL).WithPgTypeInfo(2, "bool"); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + ArrowSchema schema; + schema.release = nullptr; + ArrowError error; + + PostgresCopyStreamReader reader; + ASSERT_EQ(reader.Init(input_type), NANOARROW_OK); + + // Make sure we can guess a schema + ASSERT_EQ(reader.InferOutputSchema(&error), NANOARROW_OK); + ASSERT_EQ(reader.GetSchema(&schema), NANOARROW_OK); + ASSERT_NE(schema.release, nullptr); + ASSERT_STREQ(schema.format, "+s"); + ASSERT_EQ(schema.n_children, 1); + ASSERT_STREQ(schema.children[0]->format, "b"); + + // Make sure we can initialize the readers + ASSERT_EQ(reader.InitFieldReaders(&error), NANOARROW_OK); + + // Make sure we can read! + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBoolean; + data.size_bytes = sizeof(kTestPgCopyBoolean); + + ASSERT_EQ(reader.ReadHeader(&data, &error), NANOARROW_OK); + ASSERT_EQ(reader.ReadRecord(&data, &error), NANOARROW_OK); + ASSERT_EQ(reader.ReadRecord(&data, &error), NANOARROW_OK); + ASSERT_EQ(reader.ReadRecord(&data, &error), NANOARROW_OK); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean + 2, sizeof(kTestPgCopyBoolean)); + ASSERT_EQ(data.size_bytes, 2); + + ArrowArray array; + ASSERT_EQ(reader.GetArray(&array, &error), NANOARROW_OK); + + ASSERT_EQ(array.length, 3); + ASSERT_EQ(array.n_children, 1); + const uint8_t* validity = + reinterpret_cast(array.children[0]->buffers[0]); + const uint8_t* bool_data = + reinterpret_cast(array.children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(bool_data, nullptr); + + ASSERT_TRUE(ArrowBitGet(bool_data, 0)); + ASSERT_FALSE(ArrowBitGet(bool_data, 1)); + ASSERT_FALSE(ArrowBitGet(bool_data, 2)); + + array.release(&array); + schema.release(&schema); +} + TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { EXPECT_EQ(sizeof(kTestPgCopyBoolean), sizeof(kTestPgCopyBoolean)); } From 2a31194edc6d91fb52102953319620c0c3b6d607 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 12:10:16 -0300 Subject: [PATCH 45/90] text passing --- c/driver/postgresql/postgres_copy_utils.h | 33 +- .../postgresql/postgres_copy_utils_test.cc | 325 +++++++++++++++--- 2 files changed, 295 insertions(+), 63 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 03d18cb265..155748d225 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -188,8 +188,8 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { return EINVAL; } - T value_uint = ReadUnsafe(data); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value_uint, sizeof(T))); + T value = ReadUnsafe(data); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value, sizeof(T))); array->length++; return NANOARROW_OK; } @@ -202,7 +202,8 @@ class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { public: ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data->size_bytes <= 0) { + // -1 for NULL (0 would be empty string) + if (field_size_bytes < 0) { return ArrowArrayAppendNull(array, 1); } @@ -214,9 +215,12 @@ class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { } NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, field_size_bytes)); + data->data.as_uint8 += field_size_bytes; + data->size_bytes -= field_size_bytes; + int32_t* offsets = reinterpret_cast(offsets_->data); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - offsets_, offsets[array->length] + static_cast(data_->size_bytes))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(offsets_, offsets[array->length] + field_size_bytes)); array->length++; return NANOARROW_OK; @@ -289,7 +293,6 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { } } - ArrowBufferView field_data; for (int64_t i = 0; i < n_items; i++) { int32_t child_field_size_bytes; NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); @@ -362,7 +365,6 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { return EINVAL; } - struct ArrowBufferView field_data; for (uint16_t i = 0; i < n_fields; i++) { int32_t child_field_size_bytes; NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); @@ -431,7 +433,7 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch case NANOARROW_TYPE_INT16: switch (pg_type.recv()) { case PostgresType::PG_RECV_INT2: - *out = new PostgresCopyNetworkEndianFieldReader(); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: return ErrorCantConvert(error, pg_type, schema_view); @@ -440,25 +442,25 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch case NANOARROW_TYPE_INT32: switch (pg_type.recv()) { case PostgresType::PG_RECV_INT4: - *out = new PostgresCopyNetworkEndianFieldReader(); + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: return ErrorCantConvert(error, pg_type, schema_view); } - case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_INT64: switch (pg_type.recv()) { - case PostgresType::PG_RECV_FLOAT4: - *out = new PostgresCopyNetworkEndianFieldReader(); + case PostgresType::PG_RECV_INT8: + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: return ErrorCantConvert(error, pg_type, schema_view); } - case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_FLOAT: switch (pg_type.recv()) { - case PostgresType::PG_RECV_INT8: - *out = new PostgresCopyNetworkEndianFieldReader(); + case PostgresType::PG_RECV_FLOAT4: + *out = new PostgresCopyNetworkEndianFieldReader(); return NANOARROW_OK; default: return ErrorCantConvert(error, pg_type, schema_view); @@ -650,6 +652,7 @@ class PostgresCopyStreamReader { if (array_->release == nullptr) { NANOARROW_RETURN_NOT_OK( ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array_.get())); NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); } diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index 97d5fb6bab..89e2c499e3 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -23,6 +23,36 @@ using adbcpq::PostgresCopyStreamReader; using adbcpq::PostgresType; +class PostgresCopyStreamTester { + public: + ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(reader_.Init(root_type)); + NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error)); + NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error)); + return NANOARROW_OK; + } + + ArrowErrorCode ReadAll(ArrowBufferView* data, ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(reader_.ReadHeader(data, error)); + + int result; + do { + result = reader_.ReadRecord(data, error); + } while (result == NANOARROW_OK); + + return result; + } + + void GetSchema(ArrowSchema* out) { reader_.GetSchema(out); } + + ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error = nullptr) { + return reader_.GetArray(out, error); + } + + private: + PostgresCopyStreamReader reader_; +}; + // COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS // drvd("col")) TO STDOUT; static uint8_t kTestPgCopyBoolean[] = { @@ -30,63 +60,44 @@ static uint8_t kTestPgCopyBoolean[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; -TEST(PostgresCopyUtilsTest, PostgresCopyReadStreamBasic) { - auto col_type = PostgresType(PostgresType::PG_RECV_BOOL).WithPgTypeInfo(2, "bool"); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - ArrowSchema schema; - schema.release = nullptr; - ArrowError error; - - PostgresCopyStreamReader reader; - ASSERT_EQ(reader.Init(input_type), NANOARROW_OK); - - // Make sure we can guess a schema - ASSERT_EQ(reader.InferOutputSchema(&error), NANOARROW_OK); - ASSERT_EQ(reader.GetSchema(&schema), NANOARROW_OK); - ASSERT_NE(schema.release, nullptr); - ASSERT_STREQ(schema.format, "+s"); - ASSERT_EQ(schema.n_children, 1); - ASSERT_STREQ(schema.children[0]->format, "b"); - - // Make sure we can initialize the readers - ASSERT_EQ(reader.InitFieldReaders(&error), NANOARROW_OK); - - // Make sure we can read! +TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { ArrowBufferView data; data.data.as_uint8 = kTestPgCopyBoolean; data.size_bytes = sizeof(kTestPgCopyBoolean); - ASSERT_EQ(reader.ReadHeader(&data, &error), NANOARROW_OK); - ASSERT_EQ(reader.ReadRecord(&data, &error), NANOARROW_OK); - ASSERT_EQ(reader.ReadRecord(&data, &error), NANOARROW_OK); - ASSERT_EQ(reader.ReadRecord(&data, &error), NANOARROW_OK); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean + 2, sizeof(kTestPgCopyBoolean)); - ASSERT_EQ(data.size_bytes, 2); + auto col_type = PostgresType(PostgresType::PG_RECV_BOOL); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); - ArrowArray array; - ASSERT_EQ(reader.GetArray(&array, &error), NANOARROW_OK); + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + // Apparently the output above contains an extra 0xff 0xff at the end + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean, sizeof(kTestPgCopyBoolean)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); ASSERT_EQ(array.length, 3); ASSERT_EQ(array.n_children, 1); + const uint8_t* validity = reinterpret_cast(array.children[0]->buffers[0]); - const uint8_t* bool_data = + const uint8_t* data_buffer = reinterpret_cast(array.children[0]->buffers[1]); ASSERT_NE(validity, nullptr); - ASSERT_NE(bool_data, nullptr); + ASSERT_NE(data_buffer, nullptr); - ASSERT_TRUE(ArrowBitGet(bool_data, 0)); - ASSERT_FALSE(ArrowBitGet(bool_data, 1)); - ASSERT_FALSE(ArrowBitGet(bool_data, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); - array.release(&array); - schema.release(&schema); -} + ASSERT_TRUE(ArrowBitGet(data_buffer, 0)); + ASSERT_FALSE(ArrowBitGet(data_buffer, 1)); + ASSERT_FALSE(ArrowBitGet(data_buffer, 2)); -TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { - EXPECT_EQ(sizeof(kTestPgCopyBoolean), sizeof(kTestPgCopyBoolean)); + array.release(&array); } // COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), @@ -99,7 +110,43 @@ static uint8_t kTestPgCopySmallInt[] = { 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { - EXPECT_EQ(sizeof(kTestPgCopySmallInt), sizeof(kTestPgCopySmallInt)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopySmallInt; + data.size_bytes = sizeof(kTestPgCopySmallInt); + + auto col_type = PostgresType(PostgresType::PG_RECV_INT2); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopySmallInt, sizeof(kTestPgCopySmallInt)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 5); + ASSERT_EQ(array.n_children, 1); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); + + array.release(&array); } // COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), @@ -112,7 +159,43 @@ static uint8_t kTestPgCopyInteger[] = { 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { - EXPECT_EQ(sizeof(kTestPgCopyInteger), sizeof(kTestPgCopyInteger)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyInteger; + data.size_bytes = sizeof(kTestPgCopyInteger); + + auto col_type = PostgresType(PostgresType::PG_RECV_INT4); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInteger, sizeof(kTestPgCopyInteger)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 5); + ASSERT_EQ(array.n_children, 1); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); + + array.release(&array); } // COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), @@ -126,7 +209,43 @@ static uint8_t kTestPgCopyBigInt[] = { 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { - EXPECT_EQ(sizeof(kTestPgCopyBigInt), sizeof(kTestPgCopyBigInt)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBigInt; + data.size_bytes = sizeof(kTestPgCopyBigInt); + + auto col_type = PostgresType(PostgresType::PG_RECV_INT8); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBigInt, sizeof(kTestPgCopyBigInt)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 5); + ASSERT_EQ(array.n_children, 1); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); + + array.release(&array); } // COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), @@ -139,7 +258,43 @@ static uint8_t kTestPgCopyReal[] = { 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { - EXPECT_EQ(sizeof(kTestPgCopyReal), sizeof(kTestPgCopyReal)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyReal; + data.size_bytes = sizeof(kTestPgCopyReal); + + auto col_type = PostgresType(PostgresType::PG_RECV_FLOAT4); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyReal, sizeof(kTestPgCopyReal)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 5); + ASSERT_EQ(array.n_children, 1); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_FLOAT_EQ(data_buffer[0], -123.456); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_FLOAT_EQ(data_buffer[3], 123.456); + ASSERT_EQ(data_buffer[4], 0); + + array.release(&array); } // COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), @@ -153,7 +308,44 @@ static uint8_t kTestPgCopyDoublePrecision[] = { 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { - EXPECT_EQ(sizeof(kTestPgCopyDoublePrecision), sizeof(kTestPgCopyDoublePrecision)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyDoublePrecision; + data.size_bytes = sizeof(kTestPgCopyDoublePrecision); + + auto col_type = PostgresType(PostgresType::PG_RECV_FLOAT8); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDoublePrecision, + sizeof(kTestPgCopyDoublePrecision)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 5); + ASSERT_EQ(array.n_children, 1); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_DOUBLE_EQ(data_buffer[0], -123.456); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_DOUBLE_EQ(data_buffer[3], 123.456); + ASSERT_EQ(data_buffer[4], 0); + + array.release(&array); } // COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), @@ -165,7 +357,44 @@ static uint8_t kTestPgCopyText[] = { 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { - EXPECT_EQ(sizeof(kTestPgCopyText), sizeof(kTestPgCopyText)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyText; + data.size_bytes = sizeof(kTestPgCopyText); + + auto col_type = PostgresType(PostgresType::PG_RECV_TEXT); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyText, sizeof(kTestPgCopyText)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 3); + ASSERT_EQ(array.n_children, 1); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto offsets = reinterpret_cast(array.children[0]->buffers[1]); + auto data_buffer = reinterpret_cast(array.children[0]->buffers[2]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 3); + ASSERT_EQ(offsets[2], 7); + ASSERT_EQ(offsets[3], 7); + + ASSERT_EQ(std::string(data_buffer + 0, 3), "abc"); + ASSERT_EQ(std::string(data_buffer + 3, 4), "1234"); + + array.release(&array); } // COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, From 64afbe2228c29df932961dcee15a6b178114859f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 12:30:54 -0300 Subject: [PATCH 46/90] array support --- c/driver/postgresql/postgres_copy_utils.h | 8 ++-- .../postgresql/postgres_copy_utils_test.cc | 46 ++++++++++++++++++- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 155748d225..061210ff0a 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -249,7 +249,7 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data->size_bytes <= 0) { + if (field_size_bytes <= 0) { return ArrowArrayAppendNull(array, 1); } @@ -287,8 +287,8 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { int32_t lower_bound; NANOARROW_RETURN_NOT_OK(ReadChecked(data, &lower_bound, error)); - if (lower_bound != 0) { - ArrowErrorSet(error, "Array value with lower bound != 0 is not supported"); + if (lower_bound != 1) { + ArrowErrorSet(error, "Array value with lower bound != 1 is not supported"); return EINVAL; } } @@ -504,6 +504,7 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch auto array_reader = std::unique_ptr( new PostgresCopyArrayFieldReader()); + array_reader->Init(pg_type); PostgresCopyFieldReader* child_reader; NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( @@ -531,6 +532,7 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch auto record_reader = std::unique_ptr( new PostgresCopyRecordFieldReader()); + record_reader->Init(pg_type); for (int64_t i = 0; i < pg_type.n_children(); i++) { PostgresCopyFieldReader* child_reader; diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index 89e2c499e3..9efeaa3e5a 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -411,7 +411,51 @@ static uint8_t kTestPgCopyIntegerArray[] = { 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { - EXPECT_EQ(sizeof(kTestPgCopyIntegerArray), sizeof(kTestPgCopyIntegerArray)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyIntegerArray; + data.size_bytes = sizeof(kTestPgCopyIntegerArray); + + auto col_type = PostgresType(PostgresType::PG_RECV_INT4).Array(); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyIntegerArray, + sizeof(kTestPgCopyIntegerArray)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 3); + ASSERT_EQ(array.n_children, 1); + ASSERT_EQ(array.children[0]->n_children, 1); + ASSERT_EQ(array.children[0]->children[0]->length, 5); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto offsets = reinterpret_cast(array.children[0]->buffers[1]); + auto data_buffer = + reinterpret_cast(array.children[0]->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 2); + ASSERT_EQ(offsets[2], 5); + ASSERT_EQ(offsets[3], 5); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 0); + ASSERT_EQ(data_buffer[3], 1); + ASSERT_EQ(data_buffer[4], 123); + + array.release(&array); } // CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); From d8d24064e1c546706286d8c557c2ed00d908b120 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 13:05:00 -0300 Subject: [PATCH 47/90] recor types! --- c/driver/postgresql/postgres_copy_utils.h | 94 ++++++++++++++++--- .../postgresql/postgres_copy_utils_test.cc | 45 ++++++++- 2 files changed, 126 insertions(+), 13 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index 061210ff0a..c6f97d5279 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -227,7 +227,6 @@ class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { } }; -// class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { public: void InitChild(std::unique_ptr child) { @@ -344,7 +343,7 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, ArrowError* error) override { - if (data->size_bytes == 0) { + if (field_size_bytes < 0) { return ArrowArrayAppendNull(array, 1); } @@ -352,20 +351,19 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { // the number of bytes read against the field size when finished const uint8_t* data0 = data->data.as_uint8; - int16_t n_fields; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); - if (n_fields == -1) { - return ENODATA; - } else if (n_fields != array->n_children) { - ArrowErrorSet(error, - "Expected -1 for end-of-stream or number of fields in output array " - "(%ld) but got %d", + int32_t n_fields; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); + if (n_fields != array->n_children) { + ArrowErrorSet(error, "Expected nested record type to have %ld fields but got %d", static_cast(array->n_children), static_cast(n_fields)); // NOLINT(runtime/int) return EINVAL; } - for (uint16_t i = 0; i < n_fields; i++) { + for (int32_t i = 0; i < n_fields; i++) { + uint32_t child_oid; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_oid, error)); + int32_t child_field_size_bytes; NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); int result = @@ -404,6 +402,78 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { std::vector> children_; }; +// Subtely different from a Record field item: field count is an int16_t +// instead of an int32_t and each field is not prefixed by its OID. +class PostgresCopyFieldTupleReader : public PostgresCopyFieldReader { + public: + void AppendChild(std::unique_ptr child) { + int64_t child_i = static_cast(children_.size()); + children_.push_back(std::move(child)); + children_[child_i]->Init(*pg_type_.child(child_i)); + } + + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); + for (int64_t i = 0; i < schema->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + int16_t n_fields; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); + if (n_fields == -1) { + return ENODATA; + } else if (n_fields != array->n_children) { + ArrowErrorSet(error, + "Expected -1 for end-of-stream or number of fields in output array " + "(%ld) but got %d", + static_cast(array->n_children), + static_cast(n_fields)); // NOLINT(runtime/int) + return EINVAL; + } + + for (int16_t i = 0; i < n_fields; i++) { + int32_t child_field_size_bytes; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); + int result = + children_[i]->Read(data, child_field_size_bytes, array->children[i], error); + + // On overflow, pretend all previous children for this struct were never + // appended to. This leaves array in a valid state in the specific case + // where EOVERFLOW was returned so that a higher level caller can attempt + // to try again after creating a new array. + if (result == EOVERFLOW) { + for (int16_t j = 0; j < i; j++) { + array->children[j]->length--; + } + } + + if (result != NANOARROW_OK) { + return result; + } + } + + array->length++; + return NANOARROW_OK; + } + + private: + std::vector> children_; +}; + // Factory for a PostgresCopyFieldReader that instantiates the proper subclass // and gives a nice error for Postgres type -> Arrow type conversions that aren't // supported. @@ -677,7 +747,7 @@ class PostgresCopyStreamReader { } private: - PostgresCopyRecordFieldReader root_reader_; + PostgresCopyFieldTupleReader root_reader_; nanoarrow::UniqueSchema schema_; nanoarrow::UniqueArray array_; }; diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_utils_test.cc index 9efeaa3e5a..34dce3feb6 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_utils_test.cc @@ -472,5 +472,48 @@ static uint8_t kTestPgCopyCustomRecord[] = { 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { - EXPECT_EQ(sizeof(kTestPgCopyCustomRecord), sizeof(kTestPgCopyCustomRecord)); + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyCustomRecord; + data.size_bytes = sizeof(kTestPgCopyCustomRecord); + + auto col_type = PostgresType(PostgresType::PG_RECV_RECORD); + col_type.AppendChild("nested1", PostgresType(PostgresType::PG_RECV_INT4)); + col_type.AppendChild("nested2", PostgresType(PostgresType::PG_RECV_FLOAT8)); + PostgresType input_type(PostgresType::PG_RECV_RECORD); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyCustomRecord, + sizeof(kTestPgCopyCustomRecord)); + ASSERT_EQ(data.size_bytes, 0); + + struct ArrowArray array; + ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); + ASSERT_EQ(array.length, 3); + ASSERT_EQ(array.n_children, 1); + ASSERT_EQ(array.children[0]->n_children, 2); + ASSERT_EQ(array.children[0]->children[0]->length, 3); + ASSERT_EQ(array.children[0]->children[1]->length, 3); + + auto validity = reinterpret_cast(array.children[0]->buffers[0]); + auto data_buffer1 = + reinterpret_cast(array.children[0]->children[0]->buffers[1]); + auto data_buffer2 = + reinterpret_cast(array.children[0]->children[1]->buffers[1]); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(data_buffer1[0], 123); + ASSERT_EQ(data_buffer1[1], 12); + ASSERT_EQ(data_buffer1[2], 0); + + ASSERT_DOUBLE_EQ(data_buffer2[0], 456.789); + ASSERT_DOUBLE_EQ(data_buffer2[1], 345.678); + ASSERT_DOUBLE_EQ(data_buffer2[2], 0); + + array.release(&array); } From f10b06cee5edd4761f4f9cb9fefae9a97912af99 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 13:09:35 -0300 Subject: [PATCH 48/90] maybe fix runtime/int --- c/driver/postgresql/postgres_copy_utils.h | 30 ++++++++++++----------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_utils.h index c6f97d5279..94362c4d53 100644 --- a/c/driver/postgresql/postgres_copy_utils.h +++ b/c/driver/postgresql/postgres_copy_utils.h @@ -355,8 +355,8 @@ class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); if (n_fields != array->n_children) { ArrowErrorSet(error, "Expected nested record type to have %ld fields but got %d", - static_cast(array->n_children), - static_cast(n_fields)); // NOLINT(runtime/int) + static_cast(array->n_children), // NOLINT(runtime/int) + static_cast(n_fields)); // NOLINT(runtime/int) return EINVAL; } @@ -440,8 +440,8 @@ class PostgresCopyFieldTupleReader : public PostgresCopyFieldReader { ArrowErrorSet(error, "Expected -1 for end-of-stream or number of fields in output array " "(%ld) but got %d", - static_cast(array->n_children), - static_cast(n_fields)); // NOLINT(runtime/int) + static_cast(array->n_children), // NOLINT(runtime/int) + static_cast(n_fields)); // NOLINT(runtime/int) return EINVAL; } @@ -595,8 +595,8 @@ ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* sch ArrowErrorSet(error, "Can't convert Postgres record type with %ld chlidren to Arrow " "struct type with %ld children", - static_cast(pg_type.n_children()), - static_cast(schema->n_children)); // NOLINT(runtime/int) + static_cast(pg_type.n_children()), // NOLINT(runtime/int) + static_cast(schema->n_children)); // NOLINT(runtime/int) return EINVAL; } @@ -647,8 +647,9 @@ class PostgresCopyStreamReader { ArrowErrorSet(error, "Expected output schema with %ld columns to match Postgres input but " "got schema with %ld columns", - static_cast(root_reader_.InputType().n_children()), - static_cast(schema->n_children)); // NOLINT(runtime/int) + static_cast( + root_reader_.InputType().n_children()), // NOLINT(runtime/int) + static_cast(schema->n_children)); // NOLINT(runtime/int) return EINVAL; } @@ -684,11 +685,12 @@ class PostgresCopyStreamReader { ArrowErrorCode ReadHeader(ArrowBufferView* data, ArrowError* error) { if (data->size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { - ArrowErrorSet(error, - "Expected PGCOPY signature of %ld bytes at beginning of stream but " - "found %ld bytes of input", - static_cast(sizeof(kPgCopyBinarySignature)), - static_cast(data->size_bytes)); // NOLINT(runtime/int) + ArrowErrorSet( + error, + "Expected PGCOPY signature of %ld bytes at beginning of stream but " + "found %ld bytes of input", + static_cast(sizeof(kPgCopyBinarySignature)), // NOLINT(runtime/int) + static_cast(data->size_bytes)); // NOLINT(runtime/int) return EINVAL; } @@ -710,7 +712,7 @@ class PostgresCopyStreamReader { ArrowErrorSet(error, "Expected %ld bytes of extension metadata at start of stream but " "found %ld bytes of input", - static_cast(extension_length), + static_cast(extension_length), // NOLINT(runtime/int) static_cast(data->size_bytes)); // NOLINT(runtime/int) return EINVAL; } From c8c7d5f68bd536a7df3a5965b766b5f4763008e9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 13:11:45 -0300 Subject: [PATCH 49/90] better name for file --- c/driver/postgresql/CMakeLists.txt | 2 +- .../{postgres_copy_utils.h => postgres_copy_reader.h} | 0 ...postgres_copy_utils_test.cc => postgres_copy_reader_test.cc} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename c/driver/postgresql/{postgres_copy_utils.h => postgres_copy_reader.h} (100%) rename c/driver/postgresql/{postgres_copy_utils_test.cc => postgres_copy_reader_test.cc} (99%) diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index c3deb6ab00..39f8d2e65e 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -77,7 +77,7 @@ if(ADBC_BUILD_TESTS) PREFIX adbc SOURCES - postgres_copy_utils_test.cc + postgres_copy_reader_test.cc postgres_type_test.cc postgresql_test.cc ../../validation/adbc_validation.cc diff --git a/c/driver/postgresql/postgres_copy_utils.h b/c/driver/postgresql/postgres_copy_reader.h similarity index 100% rename from c/driver/postgresql/postgres_copy_utils.h rename to c/driver/postgresql/postgres_copy_reader.h diff --git a/c/driver/postgresql/postgres_copy_utils_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc similarity index 99% rename from c/driver/postgresql/postgres_copy_utils_test.cc rename to c/driver/postgresql/postgres_copy_reader_test.cc index 34dce3feb6..3d989722b2 100644 --- a/c/driver/postgresql/postgres_copy_utils_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -18,7 +18,7 @@ #include #include -#include "postgres_copy_utils.h" +#include "postgres_copy_reader.h" using adbcpq::PostgresCopyStreamReader; using adbcpq::PostgresType; From 80f5b2c98879538681e271402382bfa944f94c6b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 14:40:07 -0300 Subject: [PATCH 50/90] wire up the postgres_type to the statement --- c/driver/postgresql/connection.cc | 1 + c/driver/postgresql/connection.h | 5 +++ c/driver/postgresql/database.cc | 14 +++++++- c/driver/postgresql/database.h | 6 ++++ c/driver/postgresql/postgres_type.h | 17 ++++----- c/driver/postgresql/statement.cc | 53 ++++++----------------------- c/driver/postgresql/statement.h | 2 ++ 7 files changed, 47 insertions(+), 51 deletions(-) diff --git a/c/driver/postgresql/connection.cc b/c/driver/postgresql/connection.cc index 38cba57f17..1abf28481d 100644 --- a/c/driver/postgresql/connection.cc +++ b/c/driver/postgresql/connection.cc @@ -59,6 +59,7 @@ AdbcStatusCode PostgresConnection::Init(struct AdbcDatabase* database, database_ = *reinterpret_cast*>(database->private_data); type_mapping_ = database_->type_mapping(); + type_resolver_ = database_->type_resolver(); return database_->Connect(&conn_, error); } diff --git a/c/driver/postgresql/connection.h b/c/driver/postgresql/connection.h index 9105c61e4b..343977dec6 100644 --- a/c/driver/postgresql/connection.h +++ b/c/driver/postgresql/connection.h @@ -23,6 +23,7 @@ #include #include +#include "postgres_type.h" #include "type.h" namespace adbcpq { @@ -42,10 +43,14 @@ class PostgresConnection { PGconn* conn() const { return conn_; } const std::shared_ptr& type_mapping() const { return type_mapping_; } + const std::shared_ptr& type_resolver() const { + return type_resolver_; + } private: std::shared_ptr database_; std::shared_ptr type_mapping_; + std::shared_ptr type_resolver_; PGconn* conn_; bool autocommit_; }; diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index bc5e0ec2ff..4a5385867f 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -30,6 +30,7 @@ namespace adbcpq { PostgresDatabase::PostgresDatabase() : open_connections_(0) { type_mapping_ = std::make_shared(); + type_resolver_ = std::make_shared(); } PostgresDatabase::~PostgresDatabase() = default; @@ -49,19 +50,30 @@ SELECT typreceive FROM pg_catalog.pg_type +WHERE + typelem = 0 AND typrelid = 0 AND typbasetype = 0 )"; pg_result* result = PQexec(conn, kTypeQuery.c_str()); ExecStatusType pq_status = PQresultStatus(result); if (pq_status == PGRES_TUPLES_OK) { int num_rows = PQntuples(result); + PostgresTypeResolver::Item item; + for (int row = 0; row < num_rows; row++) { const uint32_t oid = static_cast( std::strtol(PQgetvalue(result, row, 0), /*str_end=*/nullptr, /*base=*/10)); const char* typname = PQgetvalue(result, row, 1); const char* typreceive = PQgetvalue(result, row, 2); - type_mapping_->Insert(oid, typname, typreceive); + + item.oid = oid; + item.typname = typname; + item.typreceive = typreceive; + + // Intentionally ignoring types we don't know how to deal with. These will error + // later if there is a query that actually contains them. + type_resolver_->Insert(item, nullptr); } } else { SetError(error, "Failed to build type mapping table: ", PQerrorMessage(conn)); diff --git a/c/driver/postgresql/database.h b/c/driver/postgresql/database.h index 9c51a77cda..76970bf97c 100644 --- a/c/driver/postgresql/database.h +++ b/c/driver/postgresql/database.h @@ -24,6 +24,7 @@ #include #include +#include "postgres_type.h" #include "type.h" namespace adbcpq { @@ -45,9 +46,14 @@ class PostgresDatabase { const std::shared_ptr& type_mapping() const { return type_mapping_; } + const std::shared_ptr& type_resolver() const { + return type_resolver_; + } + private: int32_t open_connections_; std::string uri_; std::shared_ptr type_mapping_; + std::shared_ptr type_resolver_; }; } // namespace adbcpq diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index df10e3878e..aefa461f47 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -114,13 +114,13 @@ class PostgresType { }; static std::vector PgRecvAllBase(bool nested = true) { - std::vector base = {PG_RECV_BIT, PG_RECV_BOOL, PG_RECV_BYTEA, - PG_RECV_CASH, PG_RECV_CHAR, PG_RECV_DATE, - PG_RECV_FLOAT4, PG_RECV_FLOAT8, PG_RECV_INT4, - PG_RECV_INT8, PG_RECV_INTERVAL, PG_RECV_NUMERIC, - PG_RECV_OID, PG_RECV_TEXT, PG_RECV_TIME, - PG_RECV_TIMESTAMP, PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, - PG_RECV_UUID, PG_RECV_VARBIT, PG_RECV_VARCHAR}; + std::vector base = { + PG_RECV_BIT, PG_RECV_BOOL, PG_RECV_BYTEA, PG_RECV_CASH, + PG_RECV_CHAR, PG_RECV_BPCHAR, PG_RECV_DATE, PG_RECV_FLOAT4, + PG_RECV_FLOAT8, PG_RECV_INT2, PG_RECV_INT4, PG_RECV_INT8, + PG_RECV_INTERVAL, PG_RECV_NUMERIC, PG_RECV_OID, PG_RECV_TEXT, + PG_RECV_TIME, PG_RECV_TIMESTAMP, PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, + PG_RECV_UUID, PG_RECV_VARBIT, PG_RECV_VARCHAR}; if (nested) { base.push_back(PG_RECV_ARRAY); @@ -425,6 +425,7 @@ class PostgresType { NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; case PG_RECV_CHAR: + case PG_RECV_BPCHAR: case PG_RECV_VARCHAR: case PG_RECV_TEXT: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); @@ -496,7 +497,7 @@ class PostgresTypeResolver { PostgresTypeResolver() : base_(PostgresType::AllBase()) {} - ArrowErrorCode Find(uint32_t oid, PostgresType* type_out, ArrowError* error) { + ArrowErrorCode Find(uint32_t oid, PostgresType* type_out, ArrowError* error) const { auto result = mapping_.find(oid); if (result == mapping_.end()) { ArrowErrorSet(error, "Postgres type with oid %ld not found", diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 900b23e0a0..c213a264ff 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -30,6 +30,7 @@ #include #include "connection.h" +#include "postgres_type.h" #include "type.h" #include "util.h" @@ -105,57 +106,24 @@ struct Handle { }; /// Build an Arrow schema from a PostgreSQL result set -AdbcStatusCode InferSchema(const TypeMapping& type_mapping, PGresult* result, +AdbcStatusCode InferSchema(const PostgresTypeResolver& type_resolver, PGresult* result, struct ArrowSchema* out, struct AdbcError* error) { + ArrowError na_error; const int num_fields = PQnfields(result); ArrowSchemaInit(out); CHECK_NA_ADBC(ArrowSchemaSetTypeStruct(out, num_fields), error); for (int i = 0; i < num_fields; i++) { ArrowType field_type = NANOARROW_TYPE_NA; - const Oid pg_type = PQftype(result, i); - - auto it = type_mapping.type_mapping.find(pg_type); - if (it == type_mapping.type_mapping.end()) { + const Oid pg_oid = PQftype(result, i); + PostgresType pg_type; + if (type_resolver.Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) { SetError(error, "Column #", i + 1, " (\"", PQfname(result, i), - "\") has unknown type code ", pg_type); + "\") has unknown type code ", pg_oid); return ADBC_STATUS_NOT_IMPLEMENTED; } - switch (it->second) { - // TODO: this mapping will eventually have to become dynamic, - // because of complex types like arrays/records - case PgType::kBool: - field_type = NANOARROW_TYPE_BOOL; - break; - case PgType::kFloat4: - field_type = NANOARROW_TYPE_FLOAT; - break; - case PgType::kFloat8: - field_type = NANOARROW_TYPE_DOUBLE; - break; - case PgType::kInt2: - field_type = NANOARROW_TYPE_INT16; - break; - case PgType::kInt4: - field_type = NANOARROW_TYPE_INT32; - break; - case PgType::kInt8: - field_type = NANOARROW_TYPE_INT64; - break; - case PgType::kVarBinary: - field_type = NANOARROW_TYPE_BINARY; - break; - case PgType::kText: - case PgType::kVarChar: - field_type = NANOARROW_TYPE_STRING; - break; - default: - SetError(error, "Column #", i + 1, " (\"", PQfname(result, i), - "\") has unimplemented type code ", pg_type); - return ADBC_STATUS_NOT_IMPLEMENTED; - } - CHECK_NA_ADBC(ArrowSchemaSetType(out->children[i], field_type), error); - CHECK_NA_ADBC(ArrowSchemaSetName(out->children[i], PQfname(result, i)), error); + CHECK_NA_ADBC(pg_type.WithFieldName(PQfname(result, i)).SetSchema(out->children[i]), + error); } return ADBC_STATUS_OK; } @@ -677,6 +645,7 @@ AdbcStatusCode PostgresStatement::New(struct AdbcConnection* connection, connection_ = *reinterpret_cast*>(connection->private_data); type_mapping_ = connection_->type_mapping(); + type_resolver_ = connection_->type_resolver(); reader_.conn_ = connection_->conn(); return ADBC_STATUS_OK; } @@ -836,7 +805,7 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, PQclear(result); return ADBC_STATUS_IO; } - AdbcStatusCode status = InferSchema(*type_mapping_, result, &reader_.schema_, error); + AdbcStatusCode status = InferSchema(*type_resolver_, result, &reader_.schema_, error); PQclear(result); if (status != ADBC_STATUS_OK) return status; } diff --git a/c/driver/postgresql/statement.h b/c/driver/postgresql/statement.h index bb39d0d158..3cdcf3c7d7 100644 --- a/c/driver/postgresql/statement.h +++ b/c/driver/postgresql/statement.h @@ -26,6 +26,7 @@ #include #include +#include "postgres_type.h" #include "type.h" namespace adbcpq { @@ -101,6 +102,7 @@ class PostgresStatement { private: std::shared_ptr type_mapping_; + std::shared_ptr type_resolver_; std::shared_ptr connection_; // Query state From 52cf0fac4373b5f86941d10aab7e28904f6f4a37 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 15:34:11 -0300 Subject: [PATCH 51/90] maybe remove all uses of the type mapping --- c/driver/postgresql/postgres_type.h | 15 +++++++++++++++ c/driver/postgresql/statement.cc | 26 +++++++++++++------------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index aefa461f47..56a4c49b31 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -509,6 +509,15 @@ class PostgresTypeResolver { return NANOARROW_OK; } + uint32_t GetOID(PostgresType::PgRecv recv) const { + auto result = reverse_mapping_.find(recv); + if (result == reverse_mapping_.end()) { + return 0; + } else { + return result->second; + } + } + ArrowErrorCode Insert(const Item& item, ArrowError* error) { auto result = base_.find(item.typreceive); if (result == base_.end()) { @@ -525,6 +534,7 @@ class PostgresTypeResolver { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); + reverse_mapping_.insert({base.recv(), item.oid}); break; } @@ -540,6 +550,7 @@ class PostgresTypeResolver { } mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); + reverse_mapping_.insert({base.recv(), item.oid}); break; } @@ -547,6 +558,7 @@ class PostgresTypeResolver { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); + reverse_mapping_.insert({base.recv(), item.oid}); break; } @@ -554,11 +566,13 @@ class PostgresTypeResolver { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); + reverse_mapping_.insert({base.recv(), item.oid}); break; } default: mapping_.insert({item.oid, type}); + reverse_mapping_.insert({base.recv(), item.oid}); break; } @@ -575,6 +589,7 @@ class PostgresTypeResolver { private: std::unordered_map mapping_; + std::unordered_map reverse_mapping_; std::unordered_map base_; }; diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index c213a264ff..b51d16e5fe 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -174,7 +174,8 @@ struct BindStream { return std::move(callback)(); } - AdbcStatusCode SetParamTypes(const TypeMapping& type_mapping, struct AdbcError* error) { + AdbcStatusCode SetParamTypes(const PostgresTypeResolver& type_resolver, + struct AdbcError* error) { param_types.resize(bind_schema->n_children); param_values.resize(bind_schema->n_children); param_lengths.resize(bind_schema->n_children); @@ -182,18 +183,18 @@ struct BindStream { param_values_offsets.reserve(bind_schema->n_children); for (size_t i = 0; i < bind_schema_fields.size(); i++) { - PgType pg_type; + PostgresType::PgRecv recv; switch (bind_schema_fields[i].type) { case ArrowType::NANOARROW_TYPE_INT16: - pg_type = PgType::kInt2; + recv = PostgresType::PG_RECV_INT2; param_lengths[i] = 2; break; case ArrowType::NANOARROW_TYPE_INT32: - pg_type = PgType::kInt4; + recv = PostgresType::PG_RECV_INT4; param_lengths[i] = 4; break; case ArrowType::NANOARROW_TYPE_INT64: - pg_type = PgType::kInt8; + recv = PostgresType::PG_RECV_INT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_DOUBLE: @@ -201,22 +202,21 @@ struct BindStream { param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_STRING: - pg_type = PgType::kText; + recv = PostgresType::PG_RECV_TEXT; param_lengths[i] = 0; break; default: - // TODO: data type to string SetError(error, "Field #", i + 1, " ('", bind_schema->children[i]->name, - "') has unsupported parameter type ", bind_schema_fields[i].type); + "') has unsupported parameter type ", + ArrowTypeString(bind_schema_fields[i].type)); return ADBC_STATUS_NOT_IMPLEMENTED; } - param_types[i] = type_mapping.GetOid(pg_type); + param_types[i] = type_resolver.GetOID(recv); if (param_types[i] == 0) { - // TODO: data type to string SetError(error, "Field #", i + 1, " ('", bind_schema->children[i]->name, "') has type with no corresponding PostgreSQL type ", - bind_schema_fields[i].type); + ArrowTypeString(bind_schema_fields[i].type)); return ADBC_STATUS_NOT_IMPLEMENTED; } } @@ -754,7 +754,7 @@ AdbcStatusCode PostgresStatement::ExecutePreparedStatement( std::memset(&bind_, 0, sizeof(bind_)); CHECK(bind_stream.Begin([&]() { return ADBC_STATUS_OK; }, error)); - CHECK(bind_stream.SetParamTypes(*type_mapping_, error)); + CHECK(bind_stream.SetParamTypes(*type_resolver_, error)); CHECK(bind_stream.Prepare(connection_->conn(), query_, error)); CHECK(bind_stream.Execute(connection_->conn(), rows_affected, error)); return ADBC_STATUS_OK; @@ -851,7 +851,7 @@ AdbcStatusCode PostgresStatement::ExecuteUpdateBulk(int64_t* rows_affected, return ADBC_STATUS_OK; }, error)); - CHECK(bind_stream.SetParamTypes(*type_mapping_, error)); + CHECK(bind_stream.SetParamTypes(*type_resolver_, error)); std::string insert = "INSERT INTO "; insert += ingest_.target; From 9f9d045ba15bc2c1f4a7ef7bdca708b9195a81bc Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 15:36:32 -0300 Subject: [PATCH 52/90] use reverse mapping in tests --- c/driver/postgresql/postgres_type_test.cc | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index d55aee98cb..7677906a38 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -40,36 +40,32 @@ class MockTypeResolver : public PostgresTypeResolver { item.typname = typname.c_str(); item.typreceive = typreceive.c_str(); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); - oids_[recv] = item.oid; } // Insert one of each nested type item.oid++; item.typname = "_bool"; item.typreceive = "array_recv"; - item.child_oid = oid(PostgresType::PG_RECV_BOOL); + item.child_oid = GetOID(PostgresType::PG_RECV_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); - oids_[PostgresType::PG_RECV_ARRAY] = item.oid; item.oid++; item.typname = "boolrange"; item.typreceive = "range_recv"; - item.base_oid = oid(PostgresType::PG_RECV_BOOL); + item.base_oid = GetOID(PostgresType::PG_RECV_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); - oids_[PostgresType::PG_RECV_RANGE] = item.oid; item.oid++; item.typname = "custombool"; item.typreceive = "domain_recv"; - item.base_oid = oid(PostgresType::PG_RECV_BOOL); + item.base_oid = GetOID(PostgresType::PG_RECV_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); - oids_[PostgresType::PG_RECV_DOMAIN] = item.oid; item.oid++; uint32_t class_oid = item.oid; std::vector> record_fields_ = { - {oid(PostgresType::PG_RECV_INT4), "int4_col"}, - {oid(PostgresType::PG_RECV_TEXT), "text_col"}}; + {GetOID(PostgresType::PG_RECV_INT4), "int4_col"}, + {GetOID(PostgresType::PG_RECV_TEXT), "text_col"}}; classes_.insert({class_oid, record_fields_}); item.oid++; @@ -78,13 +74,9 @@ class MockTypeResolver : public PostgresTypeResolver { item.class_oid = class_oid; NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); - oids_[PostgresType::PG_RECV_RECORD] = item.oid; - return NANOARROW_OK; } - uint32_t oid(PostgresType::PgRecv recv) { return oids_[recv]; } - ArrowErrorCode ResolveClass(uint32_t oid, std::vector>* out, ArrowError* error) override { @@ -98,7 +90,6 @@ class MockTypeResolver : public PostgresTypeResolver { } private: - std::unordered_map oids_; std::unordered_map>> classes_; }; @@ -324,9 +315,9 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { ASSERT_EQ(resolver.Init(), NANOARROW_OK); PostgresType type; - EXPECT_EQ(resolver.Find(resolver.oid(PostgresType::PG_RECV_RECORD), &type, nullptr), + EXPECT_EQ(resolver.Find(resolver.GetOID(PostgresType::PG_RECV_RECORD), &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.oid(), resolver.oid(PostgresType::PG_RECV_RECORD)); + EXPECT_EQ(type.oid(), resolver.GetOID(PostgresType::PG_RECV_RECORD)); EXPECT_EQ(type.n_children(), 2); EXPECT_EQ(type.child(0)->field_name(), "int4_col"); EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_INT4); From 1e13e503ee52749cec94167dc26501c57af14a34 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 15:38:53 -0300 Subject: [PATCH 53/90] remove type mappinng --- c/driver/postgresql/connection.cc | 1 - c/driver/postgresql/connection.h | 2 -- c/driver/postgresql/database.cc | 2 -- c/driver/postgresql/database.h | 4 ---- c/driver/postgresql/statement.cc | 1 - c/driver/postgresql/statement.h | 1 - 6 files changed, 11 deletions(-) diff --git a/c/driver/postgresql/connection.cc b/c/driver/postgresql/connection.cc index 1abf28481d..b61ebbef70 100644 --- a/c/driver/postgresql/connection.cc +++ b/c/driver/postgresql/connection.cc @@ -58,7 +58,6 @@ AdbcStatusCode PostgresConnection::Init(struct AdbcDatabase* database, } database_ = *reinterpret_cast*>(database->private_data); - type_mapping_ = database_->type_mapping(); type_resolver_ = database_->type_resolver(); return database_->Connect(&conn_, error); } diff --git a/c/driver/postgresql/connection.h b/c/driver/postgresql/connection.h index 343977dec6..8d1e98894e 100644 --- a/c/driver/postgresql/connection.h +++ b/c/driver/postgresql/connection.h @@ -42,14 +42,12 @@ class PostgresConnection { AdbcStatusCode SetOption(const char* key, const char* value, struct AdbcError* error); PGconn* conn() const { return conn_; } - const std::shared_ptr& type_mapping() const { return type_mapping_; } const std::shared_ptr& type_resolver() const { return type_resolver_; } private: std::shared_ptr database_; - std::shared_ptr type_mapping_; std::shared_ptr type_resolver_; PGconn* conn_; bool autocommit_; diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index 4a5385867f..7d764bcd8f 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -29,7 +29,6 @@ namespace adbcpq { PostgresDatabase::PostgresDatabase() : open_connections_(0) { - type_mapping_ = std::make_shared(); type_resolver_ = std::make_shared(); } PostgresDatabase::~PostgresDatabase() = default; @@ -65,7 +64,6 @@ WHERE std::strtol(PQgetvalue(result, row, 0), /*str_end=*/nullptr, /*base=*/10)); const char* typname = PQgetvalue(result, row, 1); const char* typreceive = PQgetvalue(result, row, 2); - type_mapping_->Insert(oid, typname, typreceive); item.oid = oid; item.typname = typname; diff --git a/c/driver/postgresql/database.h b/c/driver/postgresql/database.h index 76970bf97c..befeddb5e6 100644 --- a/c/driver/postgresql/database.h +++ b/c/driver/postgresql/database.h @@ -43,9 +43,6 @@ class PostgresDatabase { AdbcStatusCode Connect(PGconn** conn, struct AdbcError* error); AdbcStatusCode Disconnect(PGconn** conn, struct AdbcError* error); - - const std::shared_ptr& type_mapping() const { return type_mapping_; } - const std::shared_ptr& type_resolver() const { return type_resolver_; } @@ -53,7 +50,6 @@ class PostgresDatabase { private: int32_t open_connections_; std::string uri_; - std::shared_ptr type_mapping_; std::shared_ptr type_resolver_; }; } // namespace adbcpq diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index b51d16e5fe..72375586d4 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -644,7 +644,6 @@ AdbcStatusCode PostgresStatement::New(struct AdbcConnection* connection, } connection_ = *reinterpret_cast*>(connection->private_data); - type_mapping_ = connection_->type_mapping(); type_resolver_ = connection_->type_resolver(); reader_.conn_ = connection_->conn(); return ADBC_STATUS_OK; diff --git a/c/driver/postgresql/statement.h b/c/driver/postgresql/statement.h index 3cdcf3c7d7..3b115e0649 100644 --- a/c/driver/postgresql/statement.h +++ b/c/driver/postgresql/statement.h @@ -101,7 +101,6 @@ class PostgresStatement { struct AdbcError* error); private: - std::shared_ptr type_mapping_; std::shared_ptr type_resolver_; std::shared_ptr connection_; From 54be7f081179e162da9f83badc11116cf95e0ab1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 15:40:27 -0300 Subject: [PATCH 54/90] remove type.h/cc --- c/driver/postgresql/CMakeLists.txt | 1 - c/driver/postgresql/connection.h | 1 - c/driver/postgresql/database.h | 1 - c/driver/postgresql/statement.cc | 1 - c/driver/postgresql/statement.h | 1 - c/driver/postgresql/type.cc | 92 ------------------------------ c/driver/postgresql/type.h | 63 -------------------- 7 files changed, 160 deletions(-) delete mode 100644 c/driver/postgresql/type.cc delete mode 100644 c/driver/postgresql/type.h diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 39f8d2e65e..b9344265fb 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -43,7 +43,6 @@ add_arrow_lib(adbc_driver_postgresql database.cc postgresql.cc statement.cc - type.cc OUTPUTS ADBC_LIBRARIES CMAKE_PACKAGE_NAME diff --git a/c/driver/postgresql/connection.h b/c/driver/postgresql/connection.h index 8d1e98894e..6f63d66482 100644 --- a/c/driver/postgresql/connection.h +++ b/c/driver/postgresql/connection.h @@ -24,7 +24,6 @@ #include #include "postgres_type.h" -#include "type.h" namespace adbcpq { class PostgresDatabase; diff --git a/c/driver/postgresql/database.h b/c/driver/postgresql/database.h index befeddb5e6..b57241febd 100644 --- a/c/driver/postgresql/database.h +++ b/c/driver/postgresql/database.h @@ -25,7 +25,6 @@ #include #include "postgres_type.h" -#include "type.h" namespace adbcpq { class PostgresDatabase { diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 72375586d4..8321926b1f 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -31,7 +31,6 @@ #include "connection.h" #include "postgres_type.h" -#include "type.h" #include "util.h" namespace adbcpq { diff --git a/c/driver/postgresql/statement.h b/c/driver/postgresql/statement.h index 3b115e0649..1ad4d822ab 100644 --- a/c/driver/postgresql/statement.h +++ b/c/driver/postgresql/statement.h @@ -27,7 +27,6 @@ #include #include "postgres_type.h" -#include "type.h" namespace adbcpq { class PostgresConnection; diff --git a/c/driver/postgresql/type.cc b/c/driver/postgresql/type.cc deleted file mode 100644 index b246604d73..0000000000 --- a/c/driver/postgresql/type.cc +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "type.h" - -#include - -namespace adbcpq { -void TypeMapping::Insert(uint32_t oid, const char* typname, const char* typreceive) { - PgType type; - if (FromPgTypreceive(typreceive, &type)) { - type_mapping[oid] = type; - } - - // Record 'canonical' types - if (std::strcmp(typname, "int4") == 0) { - // DCHECK_EQ(type, PgType::kInt4); - canonical_types[static_cast(PgType::kInt4)] = oid; - } else if (std::strcmp(typname, "int8") == 0) { - // DCHECK_EQ(type, PgType::kInt8); - canonical_types[static_cast(PgType::kInt8)] = oid; - } else if (std::strcmp(typname, "float8") == 0) { - // DCHECK_EQ(type, PgType::kFloat8); - canonical_types[static_cast(PgType::kFloat8)] = oid; - } else if (std::strcmp(typname, "text") == 0) { - canonical_types[static_cast(PgType::kText)] = oid; - } - // TODO: fill in remainder -} - -uint32_t TypeMapping::GetOid(PgType type) const { - auto it = canonical_types.find(static_cast(type)); - if (it == canonical_types.end()) { - return 0; - } - return it->second; -} - -bool FromPgTypreceive(const char* typreceive, PgType* out) { - if (std::strcmp(typreceive, "bitrecv") == 0) { - *out = PgType::kBit; - } else if (std::strcmp(typreceive, "bytearecv") == 0) { - *out = PgType::kVarBinary; - } else if (std::strcmp(typreceive, "boolrecv") == 0) { - *out = PgType::kBool; - } else if (std::strcmp(typreceive, "bpcharrecv") == 0) { - *out = PgType::kVarChar; - } else if (std::strcmp(typreceive, "date_recv") == 0) { - *out = PgType::kDate; - } else if (std::strcmp(typreceive, "float4recv") == 0) { - *out = PgType::kFloat4; - } else if (std::strcmp(typreceive, "float8recv") == 0) { - *out = PgType::kFloat8; - } else if (std::strcmp(typreceive, "int2recv") == 0) { - *out = PgType::kInt2; - } else if (std::strcmp(typreceive, "int4recv") == 0) { - *out = PgType::kInt4; - } else if (std::strcmp(typreceive, "int8recv") == 0) { - *out = PgType::kInt8; - } else if (std::strcmp(typreceive, "textrecv") == 0) { - *out = PgType::kText; - } else if (std::strcmp(typreceive, "time_recv") == 0) { - *out = PgType::kTime; - } else if (std::strcmp(typreceive, "timestamp_recv") == 0) { - *out = PgType::kTimestamp; - } else if (std::strcmp(typreceive, "timestamptz_recv") == 0) { - *out = PgType::kTimestampTz; - } else if (std::strcmp(typreceive, "timetz_recv") == 0) { - *out = PgType::kTimeTz; - } else if (std::strcmp(typreceive, "varcharrecv") == 0) { - *out = PgType::kVarChar; - } else { - return false; - } - return true; -} - -} // namespace adbcpq diff --git a/c/driver/postgresql/type.h b/c/driver/postgresql/type.h deleted file mode 100644 index 1f2ce703b5..0000000000 --- a/c/driver/postgresql/type.h +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include - -namespace adbcpq { - -enum class PgType : uint8_t { - // TODO: is there a good null type? - kBit, - kBool, - kDate, - kFloat4, - kFloat8, - kInt2, - kInt4, - kInt8, - kText, - kTime, - kTimestamp, - kTimestampTz, - kTimeTz, - kVarBinary, - kVarChar, -}; - -struct TypeMapping { - // Maps PostgreSQL type OIDs to a standardized type name - // Example: int8 == 20 - std::unordered_map type_mapping; - // Maps standardized type names to the PostgreSQL type OID to use - // Example: kInt8 == 20 - // We can't use enum PgType as the key because enums don't have a hash - // implementation on gcc 4.8 (i.e., R 3.6 on Windows) - std::unordered_map canonical_types; - - void Insert(uint32_t oid, const char* typname, const char* typreceive); - /// \return 0 if not found - uint32_t GetOid(PgType type) const; -}; - -bool FromPgTypreceive(const char* typreceive, PgType* out); - -} // namespace adbcpq From 1d793f71c023241e017dc5887362dde8ceafe17d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Apr 2023 15:41:40 -0300 Subject: [PATCH 55/90] one more long cast to cover up --- c/driver/postgresql/postgres_copy_reader.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 94362c4d53..b8ca82b984 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -647,9 +647,9 @@ class PostgresCopyStreamReader { ArrowErrorSet(error, "Expected output schema with %ld columns to match Postgres input but " "got schema with %ld columns", - static_cast( - root_reader_.InputType().n_children()), // NOLINT(runtime/int) - static_cast(schema->n_children)); // NOLINT(runtime/int) + static_cast( // NOLINT(runtime/int) + root_reader_.InputType().n_children()), + static_cast(schema->n_children)); // NOLINT(runtime/int) return EINVAL; } From 4e08dc5ad4717ceb7969523fb412bbec0972138e Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 7 Apr 2023 21:51:52 -0300 Subject: [PATCH 56/90] fix can't convert --- c/driver/postgresql/postgres_copy_reader.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index b8ca82b984..6491f44a34 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -477,16 +477,19 @@ class PostgresCopyFieldTupleReader : public PostgresCopyFieldReader { // Factory for a PostgresCopyFieldReader that instantiates the proper subclass // and gives a nice error for Postgres type -> Arrow type conversions that aren't // supported. -ArrowErrorCode ErrorCantConvert(ArrowError* error, const PostgresType& pg_type, - const ArrowSchemaView& schema_view) { +static inline ArrowErrorCode ErrorCantConvert(ArrowError* error, + const PostgresType& pg_type, + const ArrowSchemaView& schema_view) { ArrowErrorSet(error, "Can't convert Postgres type '%s' to Arrow type '%s'", pg_type.typname().c_str(), ArrowTypeString(schema_view.type)); // NOLINT(runtime/int) return EINVAL; } -ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, ArrowSchema* schema, - PostgresCopyFieldReader** out, ArrowError* error) { +static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, + ArrowSchema* schema, + PostgresCopyFieldReader** out, + ArrowError* error) { ArrowSchemaView schema_view; NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); From 2299964bcb3393d5dab4592e25637a85a4f25b2d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 7 Apr 2023 21:56:06 -0300 Subject: [PATCH 57/90] nix copy reader for now --- c/driver/postgresql/CMakeLists.txt | 1 - c/driver/postgresql/postgres_copy_reader.h | 760 ------------------ .../postgresql/postgres_copy_reader_test.cc | 519 ------------ 3 files changed, 1280 deletions(-) delete mode 100644 c/driver/postgresql/postgres_copy_reader.h delete mode 100644 c/driver/postgresql/postgres_copy_reader_test.cc diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index b9344265fb..e14ea20dcc 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -76,7 +76,6 @@ if(ADBC_BUILD_TESTS) PREFIX adbc SOURCES - postgres_copy_reader_test.cc postgres_type_test.cc postgresql_test.cc ../../validation/adbc_validation.cc diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h deleted file mode 100644 index 6491f44a34..0000000000 --- a/c/driver/postgresql/postgres_copy_reader.h +++ /dev/null @@ -1,760 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#include "postgres_type.h" -#include "util.h" - -namespace adbcpq { - -static int8_t kPgCopyBinarySignature[] = {'P', 'G', 'C', 'O', 'P', 'Y', - '\n', '\377', '\r', '\n', '\0'}; - -// Read a value from the buffer without checking the buffer size. Advances -// the cursor of data and reduces its size by sizeof(T). -template -inline T ReadUnsafe(ArrowBufferView* data) { - T out; - memcpy(&out, data->data.data, sizeof(T)); - out = SwapNetworkToHost(out); - data->data.as_uint8 += sizeof(T); - data->size_bytes -= sizeof(T); - return out; -} - -// Define some explicit specializations for types that don't have a SwapNetworkToHost -// overload. -template <> -inline int8_t ReadUnsafe(ArrowBufferView* data) { - int8_t out = data->data.as_int8[0]; - data->data.as_uint8 += sizeof(int8_t); - data->size_bytes -= sizeof(int8_t); - return out; -} - -template <> -inline int16_t ReadUnsafe(ArrowBufferView* data) { - return static_cast(ReadUnsafe(data)); -} - -template <> -inline int32_t ReadUnsafe(ArrowBufferView* data) { - return static_cast(ReadUnsafe(data)); -} - -template <> -inline int64_t ReadUnsafe(ArrowBufferView* data) { - return static_cast(ReadUnsafe(data)); -} - -template -ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { - if (data->size_bytes < static_cast(sizeof(T))) { - ArrowErrorSet(error, "Unexpected end of input (expected %d bytes but found %ld)", - static_cast(sizeof(T)), - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - *out = ReadUnsafe(data); - return NANOARROW_OK; -} - -class PostgresCopyFieldReader { - public: - PostgresCopyFieldReader() : offsets_(nullptr), data_(nullptr) { - memset(&schema_view_, 0, sizeof(ArrowSchemaView)); - } - - virtual ~PostgresCopyFieldReader() {} - - void Init(const PostgresType& pg_type) { pg_type_ = pg_type; } - - const PostgresType& InputType() const { return pg_type_; } - - virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); - return NANOARROW_OK; - } - - virtual ArrowErrorCode InitArray(ArrowArray* array) { - // Cache some buffer pointers - for (int32_t i = 0; i < 3; i++) { - switch (schema_view_.layout.buffer_type[i]) { - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - if (schema_view_.layout.element_size_bits[i] == 32) { - offsets_ = ArrowArrayBuffer(array, i); - } - break; - case NANOARROW_BUFFER_TYPE_DATA: - data_ = ArrowArrayBuffer(array, i); - break; - default: - break; - } - } - - return NANOARROW_OK; - } - - virtual ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, - ArrowArray* array, ArrowError* error) { - return ENOTSUP; - } - - virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { - return NANOARROW_OK; - } - - protected: - PostgresType pg_type_; - ArrowSchemaView schema_view_; - ArrowBuffer* offsets_; - ArrowBuffer* data_; - std::vector> children_; -}; - -// Reader for a Postgres boolean (one byte -> bitmap) -class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes != 1) { - ArrowErrorSet(error, "Expected field with one byte but found field with %d bytes", - static_cast(field_size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - int64_t bytes_required = _ArrowBytesForBits(array->length + 1); - if (bytes_required > data_->size_bytes) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); - } - - if (ReadUnsafe(data)) { - ArrowBitSet(data_->data, array->length); - } else { - ArrowBitClear(data_->data, array->length); - } - - array->length++; - return NANOARROW_OK; - } -}; - -// Reader for Pg->Arrow conversions whose representations are identical minus -// the bswap from network endian. This includes all integral and float types. -template -class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes != static_cast(sizeof(T))) { - ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", - static_cast(sizeof(T)), - static_cast(field_size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - T value = ReadUnsafe(data); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value, sizeof(T))); - array->length++; - return NANOARROW_OK; - } -}; - -// Reader for Pg->Arrow conversions whose Arrow representation is simply the -// bytes of the field representation. This can be used with binary and string -// Arrow types and any Postgres type. -class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - // -1 for NULL (0 would be empty string) - if (field_size_bytes < 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes > data->size_bytes) { - ArrowErrorSet(error, "Expected %d bytes of field data but got %d bytes of input", - static_cast(field_size_bytes), - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, field_size_bytes)); - data->data.as_uint8 += field_size_bytes; - data->size_bytes -= field_size_bytes; - - int32_t* offsets = reinterpret_cast(offsets_->data); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(offsets_, offsets[array->length] + field_size_bytes)); - - array->length++; - return NANOARROW_OK; - } -}; - -class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { - public: - void InitChild(std::unique_ptr child) { - child_ = std::move(child); - child_->Init(*pg_type_.child(0)); - } - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); - NANOARROW_RETURN_NOT_OK(child_->InitSchema(schema->children[0])); - return NANOARROW_OK; - } - - ArrowErrorCode InitArray(ArrowArray* array) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); - NANOARROW_RETURN_NOT_OK(child_->InitArray(array->children[0])); - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - // Keep the cursor where we start to parse the array so we can check - // the number of bytes read against the field size when finished - const uint8_t* data0 = data->data.as_uint8; - - int32_t n_dim; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_dim, error)); - int32_t flags; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); - uint32_t element_type_oid; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &element_type_oid, error)); - - // We could validate the OID here, but this is a poor fit for all cases - // (e.g. testing) since the OID can be specific to each database - - if (n_dim < 0) { - ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", - static_cast(n_dim)); // NOLINT(runtime/int) - return EINVAL; - } - - // This is apparently allowed - if (n_dim == 0) { - NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); - return NANOARROW_OK; - } - - int64_t n_items = 1; - for (int32_t i = 0; i < n_dim; i++) { - int32_t dim_size; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &dim_size, error)); - n_items *= dim_size; - - int32_t lower_bound; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &lower_bound, error)); - if (lower_bound != 1) { - ArrowErrorSet(error, "Array value with lower bound != 1 is not supported"); - return EINVAL; - } - } - - for (int64_t i = 0; i < n_items; i++) { - int32_t child_field_size_bytes; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); - NANOARROW_RETURN_NOT_OK( - child_->Read(data, child_field_size_bytes, array->children[0], error)); - } - - int64_t bytes_read = data->data.as_uint8 - data0; - if (bytes_read != field_size_bytes) { - ArrowErrorSet(error, "Expected to read %d bytes from array field but read %d bytes", - static_cast(field_size_bytes), - static_cast(bytes_read)); // NOLINT(runtime/int) - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); - return NANOARROW_OK; - } - - private: - std::unique_ptr child_; -}; - -class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { - public: - void AppendChild(std::unique_ptr child) { - int64_t child_i = static_cast(children_.size()); - children_.push_back(std::move(child)); - children_[child_i]->Init(*pg_type_.child(child_i)); - } - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); - for (int64_t i = 0; i < schema->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode InitArray(ArrowArray* array) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes < 0) { - return ArrowArrayAppendNull(array, 1); - } - - // Keep the cursor where we start to parse the field so we can check - // the number of bytes read against the field size when finished - const uint8_t* data0 = data->data.as_uint8; - - int32_t n_fields; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); - if (n_fields != array->n_children) { - ArrowErrorSet(error, "Expected nested record type to have %ld fields but got %d", - static_cast(array->n_children), // NOLINT(runtime/int) - static_cast(n_fields)); // NOLINT(runtime/int) - return EINVAL; - } - - for (int32_t i = 0; i < n_fields; i++) { - uint32_t child_oid; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_oid, error)); - - int32_t child_field_size_bytes; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); - int result = - children_[i]->Read(data, child_field_size_bytes, array->children[i], error); - - // On overflow, pretend all previous children for this struct were never - // appended to. This leaves array in a valid state in the specific case - // where EOVERFLOW was returned so that a higher level caller can attempt - // to try again after creating a new array. - if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; j++) { - array->children[j]->length--; - } - } - - if (result != NANOARROW_OK) { - return result; - } - } - - // field size == -1 means don't check (e.g., for a top-level row tuple) - int64_t bytes_read = data->data.as_uint8 - data0; - if (field_size_bytes != -1 && bytes_read != field_size_bytes) { - ArrowErrorSet(error, - "Expected to read %d bytes from record field but read %d bytes", - static_cast(field_size_bytes), - static_cast(bytes_read)); // NOLINT(runtime/int) - return EINVAL; - } - - array->length++; - return NANOARROW_OK; - } - - private: - std::vector> children_; -}; - -// Subtely different from a Record field item: field count is an int16_t -// instead of an int32_t and each field is not prefixed by its OID. -class PostgresCopyFieldTupleReader : public PostgresCopyFieldReader { - public: - void AppendChild(std::unique_ptr child) { - int64_t child_i = static_cast(children_.size()); - children_.push_back(std::move(child)); - children_[child_i]->Init(*pg_type_.child(child_i)); - } - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); - for (int64_t i = 0; i < schema->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode InitArray(ArrowArray* array) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - int16_t n_fields; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); - if (n_fields == -1) { - return ENODATA; - } else if (n_fields != array->n_children) { - ArrowErrorSet(error, - "Expected -1 for end-of-stream or number of fields in output array " - "(%ld) but got %d", - static_cast(array->n_children), // NOLINT(runtime/int) - static_cast(n_fields)); // NOLINT(runtime/int) - return EINVAL; - } - - for (int16_t i = 0; i < n_fields; i++) { - int32_t child_field_size_bytes; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); - int result = - children_[i]->Read(data, child_field_size_bytes, array->children[i], error); - - // On overflow, pretend all previous children for this struct were never - // appended to. This leaves array in a valid state in the specific case - // where EOVERFLOW was returned so that a higher level caller can attempt - // to try again after creating a new array. - if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; j++) { - array->children[j]->length--; - } - } - - if (result != NANOARROW_OK) { - return result; - } - } - - array->length++; - return NANOARROW_OK; - } - - private: - std::vector> children_; -}; - -// Factory for a PostgresCopyFieldReader that instantiates the proper subclass -// and gives a nice error for Postgres type -> Arrow type conversions that aren't -// supported. -static inline ArrowErrorCode ErrorCantConvert(ArrowError* error, - const PostgresType& pg_type, - const ArrowSchemaView& schema_view) { - ArrowErrorSet(error, "Can't convert Postgres type '%s' to Arrow type '%s'", - pg_type.typname().c_str(), - ArrowTypeString(schema_view.type)); // NOLINT(runtime/int) - return EINVAL; -} - -static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, - ArrowSchema* schema, - PostgresCopyFieldReader** out, - ArrowError* error) { - ArrowSchemaView schema_view; - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); - - switch (schema_view.type) { - case NANOARROW_TYPE_BOOL: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_BOOL: - *out = new PostgresCopyBooleanFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_INT16: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_INT2: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_INT32: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_INT4: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_INT64: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_INT8: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_FLOAT: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_FLOAT4: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_DOUBLE: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_FLOAT8: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_STRING: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_CHAR: - case PostgresType::PG_RECV_VARCHAR: - case PostgresType::PG_RECV_TEXT: - *out = new PostgresCopyBinaryFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_BINARY: - // No need to check pg_type here: we can return the bytes of any - // Postgres type as binary. - *out = new PostgresCopyBinaryFieldReader(); - return NANOARROW_OK; - - case NANOARROW_TYPE_LIST: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_ARRAY: { - if (pg_type.n_children() != 1) { - ArrowErrorSet( - error, "Expected Postgres array type to have one child but found %ld", - static_cast(pg_type.n_children())); // NOLINT(runtime/int) - return EINVAL; - } - - auto array_reader = std::unique_ptr( - new PostgresCopyArrayFieldReader()); - array_reader->Init(pg_type); - - PostgresCopyFieldReader* child_reader; - NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( - *pg_type.child(0), schema->children[0], &child_reader, error)); - array_reader->InitChild(std::unique_ptr(child_reader)); - - *out = array_reader.release(); - return NANOARROW_OK; - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_STRUCT: - switch (pg_type.recv()) { - case PostgresType::PG_RECV_RECORD: { - if (pg_type.n_children() != schema->n_children) { - ArrowErrorSet(error, - "Can't convert Postgres record type with %ld chlidren to Arrow " - "struct type with %ld children", - static_cast(pg_type.n_children()), // NOLINT(runtime/int) - static_cast(schema->n_children)); // NOLINT(runtime/int) - return EINVAL; - } - - auto record_reader = std::unique_ptr( - new PostgresCopyRecordFieldReader()); - record_reader->Init(pg_type); - - for (int64_t i = 0; i < pg_type.n_children(); i++) { - PostgresCopyFieldReader* child_reader; - NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( - *pg_type.child(i), schema->children[i], &child_reader, error)); - record_reader->AppendChild( - std::unique_ptr(child_reader)); - } - - *out = record_reader.release(); - return NANOARROW_OK; - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } -} - -class PostgresCopyStreamReader { - public: - ArrowErrorCode Init(const PostgresType& pg_type) { - if (pg_type.recv() != PostgresType::PG_RECV_RECORD) { - return EINVAL; - } - - root_reader_.Init(pg_type); - return NANOARROW_OK; - } - - ArrowErrorCode SetOutputSchema(ArrowSchema* schema, ArrowError* error) { - if (std::string(schema_->format) != "+s") { - ArrowErrorSet( - error, - "Expected output schema of type struct but got output schema with format '%s'", - schema_->format); // NOLINT(runtime/int) - return EINVAL; - } - - if (schema_->n_children != root_reader_.InputType().n_children()) { - ArrowErrorSet(error, - "Expected output schema with %ld columns to match Postgres input but " - "got schema with %ld columns", - static_cast( // NOLINT(runtime/int) - root_reader_.InputType().n_children()), - static_cast(schema->n_children)); // NOLINT(runtime/int) - return EINVAL; - } - - schema_.reset(schema); - return NANOARROW_OK; - } - - ArrowErrorCode InferOutputSchema(ArrowError* error) { - schema_.reset(); - ArrowSchemaInit(schema_.get()); - NANOARROW_RETURN_NOT_OK(root_reader_.InputType().SetSchema(schema_.get())); - return NANOARROW_OK; - } - - ArrowErrorCode InitFieldReaders(ArrowError* error) { - if (schema_->release == nullptr) { - return EINVAL; - } - - const PostgresType& root_type = root_reader_.InputType(); - - for (int64_t i = 0; i < root_type.n_children(); i++) { - const PostgresType& child_type = *root_type.child(i); - PostgresCopyFieldReader* child_reader; - NANOARROW_RETURN_NOT_OK( - MakeCopyFieldReader(child_type, schema_->children[i], &child_reader, error)); - root_reader_.AppendChild(std::unique_ptr(child_reader)); - } - - NANOARROW_RETURN_NOT_OK(root_reader_.InitSchema(schema_.get())); - return NANOARROW_OK; - } - - ArrowErrorCode ReadHeader(ArrowBufferView* data, ArrowError* error) { - if (data->size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { - ArrowErrorSet( - error, - "Expected PGCOPY signature of %ld bytes at beginning of stream but " - "found %ld bytes of input", - static_cast(sizeof(kPgCopyBinarySignature)), // NOLINT(runtime/int) - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - if (memcmp(data->data.data, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature)) != - 0) { - ArrowErrorSet(error, "Invalid PGCOPY signature at beginning of stream"); - return EINVAL; - } - - data->data.as_uint8 += sizeof(kPgCopyBinarySignature); - data->size_bytes -= sizeof(kPgCopyBinarySignature); - - uint32_t flags; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); - uint32_t extension_length; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &extension_length, error)); - - if (data->size_bytes < static_cast(extension_length)) { - ArrowErrorSet(error, - "Expected %ld bytes of extension metadata at start of stream but " - "found %ld bytes of input", - static_cast(extension_length), // NOLINT(runtime/int) - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - data->data.as_uint8 += extension_length; - data->size_bytes -= extension_length; - return NANOARROW_OK; - } - - ArrowErrorCode ReadRecord(ArrowBufferView* data, ArrowError* error) { - if (array_->release == nullptr) { - NANOARROW_RETURN_NOT_OK( - ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array_.get())); - NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); - } - - NANOARROW_RETURN_NOT_OK(root_reader_.Read(data, -1, array_.get(), error)); - return NANOARROW_OK; - } - - ArrowErrorCode GetSchema(ArrowSchema* out) { - return ArrowSchemaDeepCopy(schema_.get(), out); - } - - ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error) { - if (array_->release == nullptr) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowArrayFinishBuilding(array_.get(), error)); - ArrowArrayMove(array_.get(), out); - return NANOARROW_OK; - } - - private: - PostgresCopyFieldTupleReader root_reader_; - nanoarrow::UniqueSchema schema_; - nanoarrow::UniqueArray array_; -}; - -} // namespace adbcpq diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc deleted file mode 100644 index 3d989722b2..0000000000 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ /dev/null @@ -1,519 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include "postgres_copy_reader.h" - -using adbcpq::PostgresCopyStreamReader; -using adbcpq::PostgresType; - -class PostgresCopyStreamTester { - public: - ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(reader_.Init(root_type)); - NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error)); - NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error)); - return NANOARROW_OK; - } - - ArrowErrorCode ReadAll(ArrowBufferView* data, ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(reader_.ReadHeader(data, error)); - - int result; - do { - result = reader_.ReadRecord(data, error); - } while (result == NANOARROW_OK); - - return result; - } - - void GetSchema(ArrowSchema* out) { reader_.GetSchema(out); } - - ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error = nullptr) { - return reader_.GetArray(out, error); - } - - private: - PostgresCopyStreamReader reader_; -}; - -// COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS -// drvd("col")) TO STDOUT; -static uint8_t kTestPgCopyBoolean[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBoolean; - data.size_bytes = sizeof(kTestPgCopyBoolean); - - auto col_type = PostgresType(PostgresType::PG_RECV_BOOL); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - - // Apparently the output above contains an extra 0xff 0xff at the end - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean, sizeof(kTestPgCopyBoolean)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 3); - ASSERT_EQ(array.n_children, 1); - - const uint8_t* validity = - reinterpret_cast(array.children[0]->buffers[0]); - const uint8_t* data_buffer = - reinterpret_cast(array.children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_TRUE(ArrowBitGet(data_buffer, 0)); - ASSERT_FALSE(ArrowBitGet(data_buffer, 1)); - ASSERT_FALSE(ArrowBitGet(data_buffer, 2)); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopySmallInt[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xff, 0xff, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopySmallInt; - data.size_bytes = sizeof(kTestPgCopySmallInt); - - auto col_type = PostgresType(PostgresType::PG_RECV_INT2); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopySmallInt, sizeof(kTestPgCopySmallInt)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 5); - ASSERT_EQ(array.n_children, 1); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyInteger[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, - 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyInteger; - data.size_bytes = sizeof(kTestPgCopyInteger); - - auto col_type = PostgresType(PostgresType::PG_RECV_INT4); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInteger, sizeof(kTestPgCopyInteger)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 5); - ASSERT_EQ(array.n_children, 1); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyBigInt[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBigInt; - data.size_bytes = sizeof(kTestPgCopyBigInt); - - auto col_type = PostgresType(PostgresType::PG_RECV_INT8); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBigInt, sizeof(kTestPgCopyBigInt)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 5); - ASSERT_EQ(array.n_children, 1); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), -// (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyReal[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xc2, 0xf6, 0xe9, - 0x79, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xbf, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, - 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyReal; - data.size_bytes = sizeof(kTestPgCopyReal); - - auto col_type = PostgresType(PostgresType::PG_RECV_FLOAT4); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyReal, sizeof(kTestPgCopyReal)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 5); - ASSERT_EQ(array.n_children, 1); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_FLOAT_EQ(data_buffer[0], -123.456); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_FLOAT_EQ(data_buffer[3], 123.456); - ASSERT_EQ(data_buffer[4], 0); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), -// (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyDoublePrecision[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xc0, 0x5e, 0xdd, - 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xbf, 0xf0, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x3f, 0xf0, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, - 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyDoublePrecision; - data.size_bytes = sizeof(kTestPgCopyDoublePrecision); - - auto col_type = PostgresType(PostgresType::PG_RECV_FLOAT8); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDoublePrecision, - sizeof(kTestPgCopyDoublePrecision)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 5); - ASSERT_EQ(array.n_children, 1); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array.children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_DOUBLE_EQ(data_buffer[0], -123.456); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_DOUBLE_EQ(data_buffer[3], 123.456); - ASSERT_EQ(data_buffer[4], 0); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), -// (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyText[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, - 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyText; - data.size_bytes = sizeof(kTestPgCopyText); - - auto col_type = PostgresType(PostgresType::PG_RECV_TEXT); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyText, sizeof(kTestPgCopyText)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 3); - ASSERT_EQ(array.n_children, 1); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto offsets = reinterpret_cast(array.children[0]->buffers[1]); - auto data_buffer = reinterpret_cast(array.children[0]->buffers[2]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 3); - ASSERT_EQ(offsets[2], 7); - ASSERT_EQ(offsets[3], 7); - - ASSERT_EQ(std::string(data_buffer + 0, 3), "abc"); - ASSERT_EQ(std::string(data_buffer + 3, 4), "1234"); - - array.release(&array); -} - -// COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, -// 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyIntegerArray[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0x85, 0x00, 0x00, 0x00, - 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, - 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyIntegerArray; - data.size_bytes = sizeof(kTestPgCopyIntegerArray); - - auto col_type = PostgresType(PostgresType::PG_RECV_INT4).Array(); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyIntegerArray, - sizeof(kTestPgCopyIntegerArray)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 3); - ASSERT_EQ(array.n_children, 1); - ASSERT_EQ(array.children[0]->n_children, 1); - ASSERT_EQ(array.children[0]->children[0]->length, 5); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto offsets = reinterpret_cast(array.children[0]->buffers[1]); - auto data_buffer = - reinterpret_cast(array.children[0]->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 2); - ASSERT_EQ(offsets[2], 5); - ASSERT_EQ(offsets[3], 5); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 0); - ASSERT_EQ(data_buffer[3], 1); - ASSERT_EQ(data_buffer[4], 123); - - array.release(&array); -} - -// CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); -// COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), -// ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyCustomRecord[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, - 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x00, 0x7b, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x7c, 0x8c, - 0x9f, 0xbe, 0x76, 0xc8, 0xb4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, - 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyCustomRecord; - data.size_bytes = sizeof(kTestPgCopyCustomRecord); - - auto col_type = PostgresType(PostgresType::PG_RECV_RECORD); - col_type.AppendChild("nested1", PostgresType(PostgresType::PG_RECV_INT4)); - col_type.AppendChild("nested2", PostgresType(PostgresType::PG_RECV_FLOAT8)); - PostgresType input_type(PostgresType::PG_RECV_RECORD); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyCustomRecord, - sizeof(kTestPgCopyCustomRecord)); - ASSERT_EQ(data.size_bytes, 0); - - struct ArrowArray array; - ASSERT_EQ(tester.GetArray(&array), NANOARROW_OK); - ASSERT_EQ(array.length, 3); - ASSERT_EQ(array.n_children, 1); - ASSERT_EQ(array.children[0]->n_children, 2); - ASSERT_EQ(array.children[0]->children[0]->length, 3); - ASSERT_EQ(array.children[0]->children[1]->length, 3); - - auto validity = reinterpret_cast(array.children[0]->buffers[0]); - auto data_buffer1 = - reinterpret_cast(array.children[0]->children[0]->buffers[1]); - auto data_buffer2 = - reinterpret_cast(array.children[0]->children[1]->buffers[1]); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(data_buffer1[0], 123); - ASSERT_EQ(data_buffer1[1], 12); - ASSERT_EQ(data_buffer1[2], 0); - - ASSERT_DOUBLE_EQ(data_buffer2[0], 456.789); - ASSERT_DOUBLE_EQ(data_buffer2[1], 345.678); - ASSERT_DOUBLE_EQ(data_buffer2[2], 0); - - array.release(&array); -} From eac666657698268bfaba589f1f690c33a7ec9051 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 8 Apr 2023 21:37:51 -0300 Subject: [PATCH 58/90] build + pass tests with mege --- c/driver/postgresql/statement.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 8321926b1f..96773a63f9 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -197,7 +197,7 @@ struct BindStream { param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_DOUBLE: - pg_type = PgType::kFloat8; + recv = PostgresType::PG_RECV_FLOAT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_STRING: From 94b8dfcf9df9efb749b8d8c981137bb6fdb9ba6a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 8 Apr 2023 23:01:45 -0300 Subject: [PATCH 59/90] fix R package build --- r/adbcpostgresql/bootstrap.R | 12 ++++++++---- r/adbcpostgresql/src/.gitignore | 3 +-- r/adbcpostgresql/src/Makevars.in | 1 - r/adbcpostgresql/src/nanoarrow/.gitignore | 1 + 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 030157e946..b7c84fc5ae 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -20,8 +20,7 @@ files_to_vendor <- c( "../../adbc.h", "../../c/driver/postgresql/util.h", - "../../c/driver/postgresql/type.h", - "../../c/driver/postgresql/type.cc", + "../../c/driver/postgresql/postgres_type.h", "../../c/driver/postgresql/statement.h", "../../c/driver/postgresql/statement.cc", "../../c/driver/postgresql/connection.h", @@ -30,6 +29,7 @@ files_to_vendor <- c( "../../c/driver/postgresql/database.cc", "../../c/driver/postgresql/postgresql.cc", "../../c/vendor/nanoarrow/nanoarrow.h", + "../../c/vendor/nanoarrow/nanoarrow.hpp", "../../c/vendor/nanoarrow/nanoarrow.c" ) @@ -50,8 +50,12 @@ if (all(file.exists(files_to_vendor))) { if (all(file.copy(files_to_vendor, "src"))) { file.rename( - c("src/nanoarrow.c", "src/nanoarrow.h"), - c("src/nanoarrow/nanoarrow.c", "src/nanoarrow/nanoarrow.h") + c("src/nanoarrow.c", "src/nanoarrow.h", "src/nanoarrow.hpp"), + c( + "src/nanoarrow/nanoarrow.c", + "src/nanoarrow/nanoarrow.h", + "src/nanoarrow/nanoarrow.hpp" + ) ) cat("All files successfully copied to src/\n") } else { diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore index 45c6e7eff7..565c6c8ad6 100644 --- a/r/adbcpostgresql/src/.gitignore +++ b/r/adbcpostgresql/src/.gitignore @@ -25,7 +25,6 @@ database.h postgresql.cc statement.h statement.cc -type.cc -type.h +postgres_type.h util.h Makevars diff --git a/r/adbcpostgresql/src/Makevars.in b/r/adbcpostgresql/src/Makevars.in index fe9d8e5043..c062d1176e 100644 --- a/r/adbcpostgresql/src/Makevars.in +++ b/r/adbcpostgresql/src/Makevars.in @@ -22,6 +22,5 @@ OBJECTS = init.o \ connection.o \ database.o \ statement.o \ - type.o \ postgresql.o \ nanoarrow/nanoarrow.o diff --git a/r/adbcpostgresql/src/nanoarrow/.gitignore b/r/adbcpostgresql/src/nanoarrow/.gitignore index 87e59e2d1d..632b0d8804 100644 --- a/r/adbcpostgresql/src/nanoarrow/.gitignore +++ b/r/adbcpostgresql/src/nanoarrow/.gitignore @@ -17,3 +17,4 @@ nanoarrow.c nanoarrow.h +nanoarrow.hpp From 88338b33e6b994ed7f87a7cb203030882dad5c95 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 8 Apr 2023 23:09:24 -0300 Subject: [PATCH 60/90] devirtualize type resolver --- c/driver/postgresql/postgres_type.h | 24 +++++++++++++++++------ c/driver/postgresql/postgres_type_test.cc | 19 ++---------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 56a4c49b31..0920be1be3 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -579,17 +579,29 @@ class PostgresTypeResolver { return NANOARROW_OK; } - virtual ArrowErrorCode ResolveClass(uint32_t oid, - std::vector>* out, - ArrowError* error) { - ArrowErrorSet(error, "Class definition with oid %ld not found", - static_cast(oid)); // NOLINT(runtime/int) - return EINVAL; + void InsertClass(uint32_t oid, + const std::vector>& cls) { + classes_.insert({oid, cls}); + } + + ArrowErrorCode ResolveClass(uint32_t oid, + std::vector>* out, + ArrowError* error) { + auto result = classes_.find(oid); + if (result == classes_.end()) { + ArrowErrorSet(error, "Class definition with oid %ld not found", + static_cast(oid)); // NOLINT(runtime/int) + return EINVAL; + } + + *out = result->second; + return NANOARROW_OK; } private: std::unordered_map mapping_; std::unordered_map reverse_mapping_; + std::unordered_map>> classes_; std::unordered_map base_; }; diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 7677906a38..eeb698c6a0 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -63,10 +63,10 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; uint32_t class_oid = item.oid; - std::vector> record_fields_ = { + std::vector> record_fields = { {GetOID(PostgresType::PG_RECV_INT4), "int4_col"}, {GetOID(PostgresType::PG_RECV_TEXT), "text_col"}}; - classes_.insert({class_oid, record_fields_}); + InsertClass(class_oid, std::move(record_fields)); item.oid++; item.typname = "customrecord"; @@ -76,21 +76,6 @@ class MockTypeResolver : public PostgresTypeResolver { NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); return NANOARROW_OK; } - - ArrowErrorCode ResolveClass(uint32_t oid, - std::vector>* out, - ArrowError* error) override { - auto result = classes_.find(oid); - if (result == classes_.end()) { - return PostgresTypeResolver::ResolveClass(oid, out, error); - } - - *out = (*result).second; - return NANOARROW_OK; - } - - private: - std::unordered_map>> classes_; }; TEST(PostgresTypeTest, PostgresTypeBasic) { From 90b2ad2a923a3686b5ccd5b06397b86ed7c07380 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 10:08:47 -0300 Subject: [PATCH 61/90] start tidy of names --- c/driver/postgresql/postgres_type.h | 711 +++++++++++----------- c/driver/postgresql/postgres_type_test.cc | 90 +-- c/driver/postgresql/statement.cc | 14 +- 3 files changed, 409 insertions(+), 406 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 0920be1be3..636cbf623b 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -28,336 +28,335 @@ namespace adbcpq { +enum PostgresTypeId { + PG_TYPE_UNINITIALIZED, + PG_TYPE_ANYARRAY, + PG_TYPE_ANYCOMPATIBLEARRAY, + PG_TYPE_ARRAY, + PG_TYPE_BIT, + PG_TYPE_BOOL, + PG_TYPE_BOX, + PG_TYPE_BPCHAR, + PG_TYPE_BRIN_BLOOM_SUMMARY, + PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY, + PG_TYPE_BYTEA, + PG_TYPE_CASH, + PG_TYPE_CHAR, + PG_TYPE_CIDR, + PG_TYPE_CID, + PG_TYPE_CIRCLE, + PG_TYPE_CSTRING, + PG_TYPE_DATE, + PG_TYPE_DOMAIN, + PG_TYPE_FLOAT4, + PG_TYPE_FLOAT8, + PG_TYPE_INET, + PG_TYPE_INT2, + PG_TYPE_INT2VECTOR, + PG_TYPE_INT4, + PG_TYPE_INT8, + PG_TYPE_INTERVAL, + PG_TYPE_JSON, + PG_TYPE_JSONB, + PG_TYPE_JSONPATH, + PG_TYPE_LINE, + PG_TYPE_LSEG, + PG_TYPE_MACADDR, + PG_TYPE_MACADDR8, + PG_TYPE_MULTIRANGE, + PG_TYPE_NAME, + PG_TYPE_NUMERIC, + PG_TYPE_OID, + PG_TYPE_OIDVECTOR, + PG_TYPE_PATH, + PG_TYPE_PG_DDL_COMMAND, + PG_TYPE_PG_DEPENDENCIES, + PG_TYPE_PG_LSN, + PG_TYPE_PG_MCV_LIST, + PG_TYPE_PG_NDISTINCT, + PG_TYPE_PG_NODE_TREE, + PG_TYPE_PG_SNAPSHOT, + PG_TYPE_POINT, + PG_TYPE_POLY, + PG_TYPE_RANGE, + PG_TYPE_RECORD, + PG_TYPE_REGCLASS, + PG_TYPE_REGCOLLATION, + PG_TYPE_REGCONFIG, + PG_TYPE_REGDICTIONARY, + PG_TYPE_REGNAMESPACE, + PG_TYPE_REGOPERATOR, + PG_TYPE_REGOPER, + PG_TYPE_REGPROCEDURE, + PG_TYPE_REGPROC, + PG_TYPE_REGROLE, + PG_TYPE_REGTYPE, + PG_TYPE_TEXT, + PG_TYPE_TID, + PG_TYPE_TIME, + PG_TYPE_TIMESTAMP, + PG_TYPE_TIMESTAMPTZ, + PG_TYPE_TIMETZ, + PG_TYPE_TSQUERY, + PG_TYPE_TSVECTOR, + PG_TYPE_TXID_SNAPSHOT, + PG_TYPE_UNKNOWN, + PG_TYPE_UUID, + PG_TYPE_VARBIT, + PG_TYPE_VARCHAR, + PG_TYPE_VOID, + PG_TYPE_XID8, + PG_TYPE_XID, + PG_TYPE_XML +}; + +static inline const char* PostgresTyprecv(PostgresTypeId type_id) { + switch (type_id) { + case PG_TYPE_ANYARRAY: + return "anyarray_recv"; + case PG_TYPE_ANYCOMPATIBLEARRAY: + return "anycompatiblearray_recv"; + case PG_TYPE_ARRAY: + return "array_recv"; + case PG_TYPE_BIT: + return "bit_recv"; + case PG_TYPE_BOOL: + return "boolrecv"; + case PG_TYPE_BOX: + return "box_recv"; + case PG_TYPE_BPCHAR: + return "bpcharrecv"; + case PG_TYPE_BRIN_BLOOM_SUMMARY: + return "brin_bloom_summary_recv"; + case PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY: + return "brin_minmax_multi_summary_recv"; + case PG_TYPE_BYTEA: + return "bytearecv"; + case PG_TYPE_CASH: + return "cash_recv"; + case PG_TYPE_CHAR: + return "charrecv"; + case PG_TYPE_CIDR: + return "cidr_recv"; + case PG_TYPE_CID: + return "cidrecv"; + case PG_TYPE_CIRCLE: + return "circle_recv"; + case PG_TYPE_CSTRING: + return "cstring_recv"; + case PG_TYPE_DATE: + return "date_recv"; + case PG_TYPE_DOMAIN: + return "domain_recv"; + case PG_TYPE_FLOAT4: + return "float4recv"; + case PG_TYPE_FLOAT8: + return "float8recv"; + case PG_TYPE_INET: + return "inet_recv"; + case PG_TYPE_INT2: + return "int2recv"; + case PG_TYPE_INT2VECTOR: + return "int2vectorrecv"; + case PG_TYPE_INT4: + return "int4recv"; + case PG_TYPE_INT8: + return "int8recv"; + case PG_TYPE_INTERVAL: + return "interval_recv"; + case PG_TYPE_JSON: + return "json_recv"; + case PG_TYPE_JSONB: + return "jsonb_recv"; + case PG_TYPE_JSONPATH: + return "jsonpath_recv"; + case PG_TYPE_LINE: + return "line_recv"; + case PG_TYPE_LSEG: + return "lseg_recv"; + case PG_TYPE_MACADDR: + return "macaddr_recv"; + case PG_TYPE_MACADDR8: + return "macaddr8_recv"; + case PG_TYPE_MULTIRANGE: + return "multirange_recv"; + case PG_TYPE_NAME: + return "namerecv"; + case PG_TYPE_NUMERIC: + return "numeric_recv"; + case PG_TYPE_OID: + return "oidrecv"; + case PG_TYPE_OIDVECTOR: + return "oidvectorrecv"; + case PG_TYPE_PATH: + return "path_recv"; + case PG_TYPE_PG_DDL_COMMAND: + return "pg_ddl_command_recv"; + case PG_TYPE_PG_DEPENDENCIES: + return "pg_dependencies_recv"; + case PG_TYPE_PG_LSN: + return "pg_lsn_recv"; + case PG_TYPE_PG_MCV_LIST: + return "pg_mcv_list_recv"; + case PG_TYPE_PG_NDISTINCT: + return "pg_ndistinct_recv"; + case PG_TYPE_PG_NODE_TREE: + return "pg_node_tree_recv"; + case PG_TYPE_PG_SNAPSHOT: + return "pg_snapshot_recv"; + case PG_TYPE_POINT: + return "point_recv"; + case PG_TYPE_POLY: + return "poly_recv"; + case PG_TYPE_RANGE: + return "range_recv"; + case PG_TYPE_RECORD: + return "record_recv"; + case PG_TYPE_REGCLASS: + return "regclassrecv"; + case PG_TYPE_REGCOLLATION: + return "regcollationrecv"; + case PG_TYPE_REGCONFIG: + return "regconfigrecv"; + case PG_TYPE_REGDICTIONARY: + return "regdictionaryrecv"; + case PG_TYPE_REGNAMESPACE: + return "regnamespacerecv"; + case PG_TYPE_REGOPERATOR: + return "regoperatorrecv"; + case PG_TYPE_REGOPER: + return "regoperrecv"; + case PG_TYPE_REGPROCEDURE: + return "regprocedurerecv"; + case PG_TYPE_REGPROC: + return "regprocrecv"; + case PG_TYPE_REGROLE: + return "regrolerecv"; + case PG_TYPE_REGTYPE: + return "regtyperecv"; + case PG_TYPE_TEXT: + return "textrecv"; + case PG_TYPE_TID: + return "tidrecv"; + case PG_TYPE_TIME: + return "time_recv"; + case PG_TYPE_TIMESTAMP: + return "timestamp_recv"; + case PG_TYPE_TIMESTAMPTZ: + return "timestamptz_recv"; + case PG_TYPE_TIMETZ: + return "timetz_recv"; + case PG_TYPE_TSQUERY: + return "tsqueryrecv"; + case PG_TYPE_TSVECTOR: + return "tsvectorrecv"; + case PG_TYPE_TXID_SNAPSHOT: + return "txid_snapshot_recv"; + case PG_TYPE_UNKNOWN: + return "unknownrecv"; + case PG_TYPE_UUID: + return "uuid_recv"; + case PG_TYPE_VARBIT: + return "varbit_recv"; + case PG_TYPE_VARCHAR: + return "varcharrecv"; + case PG_TYPE_VOID: + return "void_recv"; + case PG_TYPE_XID8: + return "xid8recv"; + case PG_TYPE_XID: + return "xidrecv"; + case PG_TYPE_XML: + return "xml_recv"; + default: + return ""; + } +} + +static inline const char* PostgresTypname(PostgresTypeId type_id) { + switch (type_id) { + case PG_TYPE_BIT: + return "bit"; + case PG_TYPE_BOOL: + return "bool"; + case PG_TYPE_BYTEA: + return "bytea"; + case PG_TYPE_CASH: + return "cash"; + case PG_TYPE_CHAR: + return "char"; + case PG_TYPE_DATE: + return "date"; + case PG_TYPE_FLOAT4: + return "float4"; + case PG_TYPE_FLOAT8: + return "float8"; + case PG_TYPE_INT2: + return "int2"; + case PG_TYPE_INT4: + return "int4"; + case PG_TYPE_INT8: + return "int8"; + case PG_TYPE_INTERVAL: + return "interval"; + case PG_TYPE_NUMERIC: + return "numeric"; + case PG_TYPE_OID: + return "oid"; + case PG_TYPE_TEXT: + return "text"; + case PG_TYPE_TIME: + return "time"; + case PG_TYPE_TIMESTAMP: + return "timestamp"; + case PG_TYPE_TIMESTAMPTZ: + return "timestamptz"; + case PG_TYPE_TIMETZ: + return "timetz"; + case PG_TYPE_UUID: + return "uuid"; + case PG_TYPE_VARBIT: + return "varbit"; + case PG_TYPE_VARCHAR: + return "varchar"; + + case PG_TYPE_ARRAY: + return "array"; + case PG_TYPE_RECORD: + return "record"; + case PG_TYPE_RANGE: + return "range"; + case PG_TYPE_DOMAIN: + return "domain"; + default: + return ""; + } +} + class PostgresType { public: - // From SELECT DISTINCT typreceive FROM pg_type; - enum PgRecv { - PG_RECV_UNINITIALIZED, - PG_RECV_ANYARRAY, - PG_RECV_ANYCOMPATIBLEARRAY, - PG_RECV_ARRAY, - PG_RECV_BIT, - PG_RECV_BOOL, - PG_RECV_BOX, - PG_RECV_BPCHAR, - PG_RECV_BRIN_BLOOM_SUMMARY, - PG_RECV_BRIN_MINMAX_MULTI_SUMMARY, - PG_RECV_BYTEA, - PG_RECV_CASH, - PG_RECV_CHAR, - PG_RECV_CIDR, - PG_RECV_CID, - PG_RECV_CIRCLE, - PG_RECV_CSTRING, - PG_RECV_DATE, - PG_RECV_DOMAIN, - PG_RECV_FLOAT4, - PG_RECV_FLOAT8, - PG_RECV_INET, - PG_RECV_INT2, - PG_RECV_INT2VECTOR, - PG_RECV_INT4, - PG_RECV_INT8, - PG_RECV_INTERVAL, - PG_RECV_JSON, - PG_RECV_JSONB, - PG_RECV_JSONPATH, - PG_RECV_LINE, - PG_RECV_LSEG, - PG_RECV_MACADDR, - PG_RECV_MACADDR8, - PG_RECV_MULTIRANGE, - PG_RECV_NAME, - PG_RECV_NUMERIC, - PG_RECV_OID, - PG_RECV_OIDVECTOR, - PG_RECV_PATH, - PG_RECV_PG_DDL_COMMAND, - PG_RECV_PG_DEPENDENCIES, - PG_RECV_PG_LSN, - PG_RECV_PG_MCV_LIST, - PG_RECV_PG_NDISTINCT, - PG_RECV_PG_NODE_TREE, - PG_RECV_PG_SNAPSHOT, - PG_RECV_POINT, - PG_RECV_POLY, - PG_RECV_RANGE, - PG_RECV_RECORD, - PG_RECV_REGCLASS, - PG_RECV_REGCOLLATION, - PG_RECV_REGCONFIG, - PG_RECV_REGDICTIONARY, - PG_RECV_REGNAMESPACE, - PG_RECV_REGOPERATOR, - PG_RECV_REGOPER, - PG_RECV_REGPROCEDURE, - PG_RECV_REGPROC, - PG_RECV_REGROLE, - PG_RECV_REGTYPE, - PG_RECV_TEXT, - PG_RECV_TID, - PG_RECV_TIME, - PG_RECV_TIMESTAMP, - PG_RECV_TIMESTAMPTZ, - PG_RECV_TIMETZ, - PG_RECV_TSQUERY, - PG_RECV_TSVECTOR, - PG_RECV_TXID_SNAPSHOT, - PG_RECV_UNKNOWN, - PG_RECV_UUID, - PG_RECV_VARBIT, - PG_RECV_VARCHAR, - PG_RECV_VOID, - PG_RECV_XID8, - PG_RECV_XID, - PG_RECV_XML - }; - - static std::vector PgRecvAllBase(bool nested = true) { - std::vector base = { - PG_RECV_BIT, PG_RECV_BOOL, PG_RECV_BYTEA, PG_RECV_CASH, - PG_RECV_CHAR, PG_RECV_BPCHAR, PG_RECV_DATE, PG_RECV_FLOAT4, - PG_RECV_FLOAT8, PG_RECV_INT2, PG_RECV_INT4, PG_RECV_INT8, - PG_RECV_INTERVAL, PG_RECV_NUMERIC, PG_RECV_OID, PG_RECV_TEXT, - PG_RECV_TIME, PG_RECV_TIMESTAMP, PG_RECV_TIMESTAMPTZ, PG_RECV_TIMETZ, - PG_RECV_UUID, PG_RECV_VARBIT, PG_RECV_VARCHAR}; + static std::vector PgRecvAllBase(bool nested = true) { + std::vector base = { + PG_TYPE_BIT, PG_TYPE_BOOL, PG_TYPE_BYTEA, PG_TYPE_CASH, + PG_TYPE_CHAR, PG_TYPE_BPCHAR, PG_TYPE_DATE, PG_TYPE_FLOAT4, + PG_TYPE_FLOAT8, PG_TYPE_INT2, PG_TYPE_INT4, PG_TYPE_INT8, + PG_TYPE_INTERVAL, PG_TYPE_NUMERIC, PG_TYPE_OID, PG_TYPE_TEXT, + PG_TYPE_TIME, PG_TYPE_TIMESTAMP, PG_TYPE_TIMESTAMPTZ, PG_TYPE_TIMETZ, + PG_TYPE_UUID, PG_TYPE_VARBIT, PG_TYPE_VARCHAR}; if (nested) { - base.push_back(PG_RECV_ARRAY); - base.push_back(PG_RECV_RECORD); - base.push_back(PG_RECV_RANGE); - base.push_back(PG_RECV_DOMAIN); + base.push_back(PG_TYPE_ARRAY); + base.push_back(PG_TYPE_RECORD); + base.push_back(PG_TYPE_RANGE); + base.push_back(PG_TYPE_DOMAIN); } return base; } - static std::string PgRecvName(PgRecv recv) { - switch (recv) { - case PG_RECV_ANYARRAY: - return "anyarray_recv"; - case PG_RECV_ANYCOMPATIBLEARRAY: - return "anycompatiblearray_recv"; - case PG_RECV_ARRAY: - return "array_recv"; - case PG_RECV_BIT: - return "bit_recv"; - case PG_RECV_BOOL: - return "boolrecv"; - case PG_RECV_BOX: - return "box_recv"; - case PG_RECV_BPCHAR: - return "bpcharrecv"; - case PG_RECV_BRIN_BLOOM_SUMMARY: - return "brin_bloom_summary_recv"; - case PG_RECV_BRIN_MINMAX_MULTI_SUMMARY: - return "brin_minmax_multi_summary_recv"; - case PG_RECV_BYTEA: - return "bytearecv"; - case PG_RECV_CASH: - return "cash_recv"; - case PG_RECV_CHAR: - return "charrecv"; - case PG_RECV_CIDR: - return "cidr_recv"; - case PG_RECV_CID: - return "cidrecv"; - case PG_RECV_CIRCLE: - return "circle_recv"; - case PG_RECV_CSTRING: - return "cstring_recv"; - case PG_RECV_DATE: - return "date_recv"; - case PG_RECV_DOMAIN: - return "domain_recv"; - case PG_RECV_FLOAT4: - return "float4recv"; - case PG_RECV_FLOAT8: - return "float8recv"; - case PG_RECV_INET: - return "inet_recv"; - case PG_RECV_INT2: - return "int2recv"; - case PG_RECV_INT2VECTOR: - return "int2vectorrecv"; - case PG_RECV_INT4: - return "int4recv"; - case PG_RECV_INT8: - return "int8recv"; - case PG_RECV_INTERVAL: - return "interval_recv"; - case PG_RECV_JSON: - return "json_recv"; - case PG_RECV_JSONB: - return "jsonb_recv"; - case PG_RECV_JSONPATH: - return "jsonpath_recv"; - case PG_RECV_LINE: - return "line_recv"; - case PG_RECV_LSEG: - return "lseg_recv"; - case PG_RECV_MACADDR: - return "macaddr_recv"; - case PG_RECV_MACADDR8: - return "macaddr8_recv"; - case PG_RECV_MULTIRANGE: - return "multirange_recv"; - case PG_RECV_NAME: - return "namerecv"; - case PG_RECV_NUMERIC: - return "numeric_recv"; - case PG_RECV_OID: - return "oidrecv"; - case PG_RECV_OIDVECTOR: - return "oidvectorrecv"; - case PG_RECV_PATH: - return "path_recv"; - case PG_RECV_PG_DDL_COMMAND: - return "pg_ddl_command_recv"; - case PG_RECV_PG_DEPENDENCIES: - return "pg_dependencies_recv"; - case PG_RECV_PG_LSN: - return "pg_lsn_recv"; - case PG_RECV_PG_MCV_LIST: - return "pg_mcv_list_recv"; - case PG_RECV_PG_NDISTINCT: - return "pg_ndistinct_recv"; - case PG_RECV_PG_NODE_TREE: - return "pg_node_tree_recv"; - case PG_RECV_PG_SNAPSHOT: - return "pg_snapshot_recv"; - case PG_RECV_POINT: - return "point_recv"; - case PG_RECV_POLY: - return "poly_recv"; - case PG_RECV_RANGE: - return "range_recv"; - case PG_RECV_RECORD: - return "record_recv"; - case PG_RECV_REGCLASS: - return "regclassrecv"; - case PG_RECV_REGCOLLATION: - return "regcollationrecv"; - case PG_RECV_REGCONFIG: - return "regconfigrecv"; - case PG_RECV_REGDICTIONARY: - return "regdictionaryrecv"; - case PG_RECV_REGNAMESPACE: - return "regnamespacerecv"; - case PG_RECV_REGOPERATOR: - return "regoperatorrecv"; - case PG_RECV_REGOPER: - return "regoperrecv"; - case PG_RECV_REGPROCEDURE: - return "regprocedurerecv"; - case PG_RECV_REGPROC: - return "regprocrecv"; - case PG_RECV_REGROLE: - return "regrolerecv"; - case PG_RECV_REGTYPE: - return "regtyperecv"; - case PG_RECV_TEXT: - return "textrecv"; - case PG_RECV_TID: - return "tidrecv"; - case PG_RECV_TIME: - return "time_recv"; - case PG_RECV_TIMESTAMP: - return "timestamp_recv"; - case PG_RECV_TIMESTAMPTZ: - return "timestamptz_recv"; - case PG_RECV_TIMETZ: - return "timetz_recv"; - case PG_RECV_TSQUERY: - return "tsqueryrecv"; - case PG_RECV_TSVECTOR: - return "tsvectorrecv"; - case PG_RECV_TXID_SNAPSHOT: - return "txid_snapshot_recv"; - case PG_RECV_UNKNOWN: - return "unknownrecv"; - case PG_RECV_UUID: - return "uuid_recv"; - case PG_RECV_VARBIT: - return "varbit_recv"; - case PG_RECV_VARCHAR: - return "varcharrecv"; - case PG_RECV_VOID: - return "void_recv"; - case PG_RECV_XID8: - return "xid8recv"; - case PG_RECV_XID: - return "xidrecv"; - case PG_RECV_XML: - return "xml_recv"; - default: - return ""; - } - } - - static std::string PgRecvTypname(PgRecv recv) { - switch (recv) { - case PG_RECV_BIT: - return "bit"; - case PG_RECV_BOOL: - return "bool"; - case PG_RECV_BYTEA: - return "bytea"; - case PG_RECV_CASH: - return "cash"; - case PG_RECV_CHAR: - return "char"; - case PG_RECV_DATE: - return "date"; - case PG_RECV_FLOAT4: - return "float4"; - case PG_RECV_FLOAT8: - return "float8"; - case PG_RECV_INT2: - return "int2"; - case PG_RECV_INT4: - return "int4"; - case PG_RECV_INT8: - return "int8"; - case PG_RECV_INTERVAL: - return "interval"; - case PG_RECV_NUMERIC: - return "numeric"; - case PG_RECV_OID: - return "oid"; - case PG_RECV_TEXT: - return "text"; - case PG_RECV_TIME: - return "time"; - case PG_RECV_TIMESTAMP: - return "timestamp"; - case PG_RECV_TIMESTAMPTZ: - return "timestamptz"; - case PG_RECV_TIMETZ: - return "timetz"; - case PG_RECV_UUID: - return "uuid"; - case PG_RECV_VARBIT: - return "varbit"; - case PG_RECV_VARCHAR: - return "varchar"; - - case PG_RECV_ARRAY: - return "array"; - case PG_RECV_RECORD: - return "record"; - case PG_RECV_RANGE: - return "range"; - case PG_RECV_DOMAIN: - return "domain"; - default: - return ""; - } - } - - explicit PostgresType(PgRecv recv) : oid_(0), recv_(recv) {} + explicit PostgresType(PostgresTypeId type_id) : oid_(0), type_id_(type_id) {} - PostgresType() : PostgresType(PG_RECV_UNINITIALIZED) {} + PostgresType() : PostgresType(PG_TYPE_UNINITIALIZED) {} void AppendChild(const std::string& field_name, const PostgresType& type) { PostgresType child(type); @@ -378,7 +377,7 @@ class PostgresType { } PostgresType Array(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(PG_RECV_ARRAY); + PostgresType out(PG_TYPE_ARRAY); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -390,7 +389,7 @@ class PostgresType { } PostgresType Range(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(PG_RECV_RANGE); + PostgresType out(PG_TYPE_RANGE); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -398,50 +397,50 @@ class PostgresType { } uint32_t oid() const { return oid_; } - PgRecv recv() const { return recv_; } + PostgresTypeId type_id() const { return type_id_; } const std::string& typname() const { return typname_; } const std::string& field_name() const { return field_name_; } int64_t n_children() const { return static_cast(children_.size()); } const PostgresType* child(int64_t i) const { return &children_[i]; } ArrowErrorCode SetSchema(ArrowSchema* schema) const { - switch (recv_) { - case PG_RECV_BOOL: + switch (type_id_) { + case PG_TYPE_BOOL: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; - case PG_RECV_INT2: + case PG_TYPE_INT2: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); break; - case PG_RECV_INT4: + case PG_TYPE_INT4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); break; - case PG_RECV_INT8: + case PG_TYPE_INT8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); break; - case PG_RECV_FLOAT4: + case PG_TYPE_FLOAT4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); break; - case PG_RECV_FLOAT8: + case PG_TYPE_FLOAT8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; - case PG_RECV_CHAR: - case PG_RECV_BPCHAR: - case PG_RECV_VARCHAR: - case PG_RECV_TEXT: + case PG_TYPE_CHAR: + case PG_TYPE_BPCHAR: + case PG_TYPE_VARCHAR: + case PG_TYPE_TEXT: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; - case PG_RECV_BYTEA: + case PG_TYPE_BYTEA: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; - case PG_RECV_RECORD: + case PG_TYPE_RECORD: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); for (int64_t i = 0; i < n_children(); i++) { NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); } break; - case PG_RECV_ARRAY: + case PG_TYPE_ARRAY: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); break; @@ -466,7 +465,7 @@ class PostgresType { private: uint32_t oid_; - PgRecv recv_; + PostgresTypeId type_id_; std::string typname_; std::string field_name_; std::vector children_; @@ -474,10 +473,10 @@ class PostgresType { public: static std::unordered_map AllBase() { std::unordered_map out; - for (PgRecv recv : PgRecvAllBase()) { - PostgresType type(recv); - type.typname_ = PgRecvTypname(recv); - out.insert({PgRecvName(recv), type}); + for (PostgresTypeId type_id : PgRecvAllBase()) { + PostgresType type(type_id); + type.typname_ = PostgresTypname(type_id); + out.insert({PostgresTyprecv(type_id), type}); } return out; @@ -509,8 +508,8 @@ class PostgresTypeResolver { return NANOARROW_OK; } - uint32_t GetOID(PostgresType::PgRecv recv) const { - auto result = reverse_mapping_.find(recv); + uint32_t GetOID(PostgresTypeId type_id) const { + auto result = reverse_mapping_.find(type_id); if (result == reverse_mapping_.end()) { return 0; } else { @@ -529,20 +528,20 @@ class PostgresTypeResolver { const PostgresType& base = (*result).second; PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); - switch (base.recv()) { - case PostgresType::PG_RECV_ARRAY: { + switch (base.type_id()) { + case PG_TYPE_ARRAY: { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); - reverse_mapping_.insert({base.recv(), item.oid}); + reverse_mapping_.insert({base.type_id(), item.oid}); break; } - case PostgresType::PG_RECV_RECORD: { + case PG_TYPE_RECORD: { std::vector> child_desc; NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); - PostgresType out(PostgresType::PG_RECV_RECORD); + PostgresType out(PG_TYPE_RECORD); for (const auto& child_item : child_desc) { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(child_item.first, &child, error)); @@ -550,29 +549,29 @@ class PostgresTypeResolver { } mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); - reverse_mapping_.insert({base.recv(), item.oid}); + reverse_mapping_.insert({base.type_id(), item.oid}); break; } - case PostgresType::PG_RECV_DOMAIN: { + case PG_TYPE_DOMAIN: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); - reverse_mapping_.insert({base.recv(), item.oid}); + reverse_mapping_.insert({base.type_id(), item.oid}); break; } - case PostgresType::PG_RECV_RANGE: { + case PG_TYPE_RANGE: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); - reverse_mapping_.insert({base.recv(), item.oid}); + reverse_mapping_.insert({base.type_id(), item.oid}); break; } default: mapping_.insert({item.oid, type}); - reverse_mapping_.insert({base.recv(), item.oid}); + reverse_mapping_.insert({base.type_id(), item.oid}); break; } @@ -600,7 +599,7 @@ class PostgresTypeResolver { private: std::unordered_map mapping_; - std::unordered_map reverse_mapping_; + std::unordered_map reverse_mapping_; std::unordered_map>> classes_; std::unordered_map base_; }; diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index eeb698c6a0..4b1e1b86b5 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -23,6 +23,7 @@ #include "postgres_type.h" using adbcpq::PostgresType; +using adbcpq::PostgresTypeId; using adbcpq::PostgresTypeResolver; class MockTypeResolver : public PostgresTypeResolver { @@ -33,9 +34,9 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid = 0; // Insert all the base types - for (auto recv : recv_base) { - std::string typreceive = PostgresType::PgRecvName(recv); - std::string typname = PostgresType::PgRecvTypname(recv); + for (auto type_id : recv_base) { + std::string typreceive = adbcpq::PostgresTyprecv(type_id); + std::string typname = adbcpq::PostgresTypname(type_id); item.oid++; item.typname = typname.c_str(); item.typreceive = typreceive.c_str(); @@ -46,26 +47,26 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; item.typname = "_bool"; item.typreceive = "array_recv"; - item.child_oid = GetOID(PostgresType::PG_RECV_BOOL); + item.child_oid = GetOID(PostgresTypeId::PG_TYPE_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "boolrange"; item.typreceive = "range_recv"; - item.base_oid = GetOID(PostgresType::PG_RECV_BOOL); + item.base_oid = GetOID(PostgresTypeId::PG_TYPE_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "custombool"; item.typreceive = "domain_recv"; - item.base_oid = GetOID(PostgresType::PG_RECV_BOOL); + item.base_oid = GetOID(PostgresTypeId::PG_TYPE_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; uint32_t class_oid = item.oid; std::vector> record_fields = { - {GetOID(PostgresType::PG_RECV_INT4), "int4_col"}, - {GetOID(PostgresType::PG_RECV_TEXT), "text_col"}}; + {GetOID(PostgresTypeId::PG_TYPE_INT4), "int4_col"}, + {GetOID(PostgresTypeId::PG_TYPE_TEXT), "text_col"}}; InsertClass(class_oid, std::move(record_fields)); item.oid++; @@ -79,47 +80,47 @@ class MockTypeResolver : public PostgresTypeResolver { }; TEST(PostgresTypeTest, PostgresTypeBasic) { - PostgresType type(PostgresType::PG_RECV_BOOL); + PostgresType type(PostgresTypeId::PG_TYPE_BOOL); EXPECT_EQ(type.field_name(), ""); EXPECT_EQ(type.typname(), ""); - EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); EXPECT_EQ(type.oid(), 0); EXPECT_EQ(type.n_children(), 0); PostgresType with_info = type.WithPgTypeInfo(1234, "some_typename"); EXPECT_EQ(with_info.oid(), 1234); EXPECT_EQ(with_info.typname(), "some_typename"); - EXPECT_EQ(with_info.recv(), type.recv()); + EXPECT_EQ(with_info.type_id(), type.type_id()); PostgresType with_name = type.WithFieldName("some name"); EXPECT_EQ(with_name.field_name(), "some name"); EXPECT_EQ(with_name.oid(), type.oid()); - EXPECT_EQ(with_name.recv(), type.recv()); + EXPECT_EQ(with_name.type_id(), type.type_id()); PostgresType array = type.Array(12345, "array type name"); EXPECT_EQ(array.oid(), 12345); EXPECT_EQ(array.typname(), "array type name"); EXPECT_EQ(array.n_children(), 1); EXPECT_EQ(array.child(0)->oid(), type.oid()); - EXPECT_EQ(array.child(0)->recv(), type.recv()); + EXPECT_EQ(array.child(0)->type_id(), type.type_id()); PostgresType range = type.Range(12345, "range type name"); EXPECT_EQ(range.oid(), 12345); EXPECT_EQ(range.typname(), "range type name"); EXPECT_EQ(range.n_children(), 1); EXPECT_EQ(range.child(0)->oid(), type.oid()); - EXPECT_EQ(range.child(0)->recv(), type.recv()); + EXPECT_EQ(range.child(0)->type_id(), type.type_id()); PostgresType domain = type.Domain(123456, "domain type name"); EXPECT_EQ(domain.oid(), 123456); EXPECT_EQ(domain.typname(), "domain type name"); - EXPECT_EQ(domain.recv(), type.recv()); + EXPECT_EQ(domain.type_id(), type.type_id()); - PostgresType record(PostgresType::PG_RECV_RECORD); + PostgresType record(PostgresTypeId::PG_TYPE_RECORD); record.AppendChild("col1", type); - EXPECT_EQ(record.recv(), PostgresType::PG_RECV_RECORD); + EXPECT_EQ(record.type_id(), PostgresTypeId::PG_TYPE_RECORD); EXPECT_EQ(record.n_children(), 1); - EXPECT_EQ(record.child(0)->recv(), type.recv()); + EXPECT_EQ(record.child(0)->type_id(), type.type_id()); EXPECT_EQ(record.child(0)->field_name(), "col1"); } @@ -127,62 +128,64 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { ArrowSchema schema; ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BOOL).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_BOOL).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_INT2).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_INT2).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "s"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_INT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_INT4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "i"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_INT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_INT8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "l"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_FLOAT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_FLOAT4).SetSchema(&schema), + NANOARROW_OK); EXPECT_STREQ(schema.format, "f"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_FLOAT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_FLOAT8).SetSchema(&schema), + NANOARROW_OK); EXPECT_STREQ(schema.format, "g"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_TEXT).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_TEXT).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "u"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BYTEA).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_BYTEA).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresType::PG_RECV_BOOL).Array().SetSchema(&schema), + EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_BOOL).Array().SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+l"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType record(PostgresType::PG_RECV_RECORD); - record.AppendChild("col1", PostgresType(PostgresType::PG_RECV_BOOL)); + PostgresType record(PostgresTypeId::PG_TYPE_RECORD); + record.AppendChild("col1", PostgresType(PostgresTypeId::PG_TYPE_BOOL)); EXPECT_EQ(record.SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+s"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType unknown(PostgresType::PG_RECV_BRIN_MINMAX_MULTI_SUMMARY); + PostgresType unknown(PostgresTypeId::PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY); EXPECT_EQ(unknown.WithPgTypeInfo(0, "some_name").SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); @@ -194,7 +197,7 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { TEST(PostgresTypeTest, PostgresTypeAllBase) { auto base_types = PostgresType::AllBase(); - EXPECT_EQ(base_types["array_recv"].recv(), PostgresType::PG_RECV_ARRAY); + EXPECT_EQ(base_types["array_recv"].type_id(), PostgresTypeId::PG_TYPE_ARRAY); EXPECT_EQ(base_types["array_recv"].typname(), "array"); EXPECT_EQ(base_types.size(), PostgresType::PgRecvAllBase().size()); } @@ -209,7 +212,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(123, &type, &error), EINVAL); EXPECT_STREQ(ArrowErrorMessage(&error), "Postgres type with oid 123 not found"); - // Check error for unsupported recv name + // Check error for unsupported type_id name item.oid = 123; item.typname = "invalid"; item.typreceive = "invalid_recv"; @@ -254,7 +257,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(10, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 10); EXPECT_EQ(type.typname(), "some_type_name"); - EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); // Check insert/resolve of array type item.oid = 11; @@ -265,9 +268,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(11, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 11); EXPECT_EQ(type.typname(), "some_array_type_name"); - EXPECT_EQ(type.recv(), PostgresType::PG_RECV_ARRAY); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_ARRAY); EXPECT_EQ(type.child(0)->oid(), 10); - EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.child(0)->type_id(), PostgresTypeId::PG_TYPE_BOOL); // Check insert/resolve of range type item.oid = 12; @@ -278,9 +281,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(12, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 12); EXPECT_EQ(type.typname(), "some_range_type_name"); - EXPECT_EQ(type.recv(), PostgresType::PG_RECV_RANGE); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_RANGE); EXPECT_EQ(type.child(0)->oid(), 10); - EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.child(0)->type_id(), PostgresTypeId::PG_TYPE_BOOL); // Check insert/resolve of domain type item.oid = 13; @@ -291,7 +294,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(13, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 13); EXPECT_EQ(type.typname(), "some_domain_type_name"); - EXPECT_EQ(type.recv(), PostgresType::PG_RECV_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); } TEST(PostgresTypeTest, PostgresTypeResolveRecord) { @@ -300,12 +303,13 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { ASSERT_EQ(resolver.Init(), NANOARROW_OK); PostgresType type; - EXPECT_EQ(resolver.Find(resolver.GetOID(PostgresType::PG_RECV_RECORD), &type, nullptr), - NANOARROW_OK); - EXPECT_EQ(type.oid(), resolver.GetOID(PostgresType::PG_RECV_RECORD)); + EXPECT_EQ( + resolver.Find(resolver.GetOID(PostgresTypeId::PG_TYPE_RECORD), &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::PG_TYPE_RECORD)); EXPECT_EQ(type.n_children(), 2); EXPECT_EQ(type.child(0)->field_name(), "int4_col"); - EXPECT_EQ(type.child(0)->recv(), PostgresType::PG_RECV_INT4); + EXPECT_EQ(type.child(0)->type_id(), PostgresTypeId::PG_TYPE_INT4); EXPECT_EQ(type.child(1)->field_name(), "text_col"); - EXPECT_EQ(type.child(1)->recv(), PostgresType::PG_RECV_TEXT); + EXPECT_EQ(type.child(1)->type_id(), PostgresTypeId::PG_TYPE_TEXT); } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 96773a63f9..31a2900bec 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -182,26 +182,26 @@ struct BindStream { param_values_offsets.reserve(bind_schema->n_children); for (size_t i = 0; i < bind_schema_fields.size(); i++) { - PostgresType::PgRecv recv; + PostgresTypeId type_id; switch (bind_schema_fields[i].type) { case ArrowType::NANOARROW_TYPE_INT16: - recv = PostgresType::PG_RECV_INT2; + type_id = PG_TYPE_INT2; param_lengths[i] = 2; break; case ArrowType::NANOARROW_TYPE_INT32: - recv = PostgresType::PG_RECV_INT4; + type_id = PG_TYPE_INT4; param_lengths[i] = 4; break; case ArrowType::NANOARROW_TYPE_INT64: - recv = PostgresType::PG_RECV_INT8; + type_id = PG_TYPE_INT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_DOUBLE: - recv = PostgresType::PG_RECV_FLOAT8; + type_id = PG_TYPE_FLOAT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_STRING: - recv = PostgresType::PG_RECV_TEXT; + type_id = PG_TYPE_TEXT; param_lengths[i] = 0; break; default: @@ -211,7 +211,7 @@ struct BindStream { return ADBC_STATUS_NOT_IMPLEMENTED; } - param_types[i] = type_resolver.GetOID(recv); + param_types[i] = type_resolver.GetOID(type_id); if (param_types[i] == 0) { SetError(error, "Field #", i + 1, " ('", bind_schema->children[i]->name, "') has type with no corresponding PostgreSQL type ", From b6ccaee8a4f1a10fe09e96dea535bab7f620c2e6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 10:26:52 -0300 Subject: [PATCH 62/90] more typeid help --- c/driver/postgresql/postgres_type.h | 549 ++++++++++++---------- c/driver/postgresql/postgres_type_test.cc | 6 +- 2 files changed, 310 insertions(+), 245 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 636cbf623b..0daabc6121 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -110,250 +110,12 @@ enum PostgresTypeId { PG_TYPE_XML }; -static inline const char* PostgresTyprecv(PostgresTypeId type_id) { - switch (type_id) { - case PG_TYPE_ANYARRAY: - return "anyarray_recv"; - case PG_TYPE_ANYCOMPATIBLEARRAY: - return "anycompatiblearray_recv"; - case PG_TYPE_ARRAY: - return "array_recv"; - case PG_TYPE_BIT: - return "bit_recv"; - case PG_TYPE_BOOL: - return "boolrecv"; - case PG_TYPE_BOX: - return "box_recv"; - case PG_TYPE_BPCHAR: - return "bpcharrecv"; - case PG_TYPE_BRIN_BLOOM_SUMMARY: - return "brin_bloom_summary_recv"; - case PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY: - return "brin_minmax_multi_summary_recv"; - case PG_TYPE_BYTEA: - return "bytearecv"; - case PG_TYPE_CASH: - return "cash_recv"; - case PG_TYPE_CHAR: - return "charrecv"; - case PG_TYPE_CIDR: - return "cidr_recv"; - case PG_TYPE_CID: - return "cidrecv"; - case PG_TYPE_CIRCLE: - return "circle_recv"; - case PG_TYPE_CSTRING: - return "cstring_recv"; - case PG_TYPE_DATE: - return "date_recv"; - case PG_TYPE_DOMAIN: - return "domain_recv"; - case PG_TYPE_FLOAT4: - return "float4recv"; - case PG_TYPE_FLOAT8: - return "float8recv"; - case PG_TYPE_INET: - return "inet_recv"; - case PG_TYPE_INT2: - return "int2recv"; - case PG_TYPE_INT2VECTOR: - return "int2vectorrecv"; - case PG_TYPE_INT4: - return "int4recv"; - case PG_TYPE_INT8: - return "int8recv"; - case PG_TYPE_INTERVAL: - return "interval_recv"; - case PG_TYPE_JSON: - return "json_recv"; - case PG_TYPE_JSONB: - return "jsonb_recv"; - case PG_TYPE_JSONPATH: - return "jsonpath_recv"; - case PG_TYPE_LINE: - return "line_recv"; - case PG_TYPE_LSEG: - return "lseg_recv"; - case PG_TYPE_MACADDR: - return "macaddr_recv"; - case PG_TYPE_MACADDR8: - return "macaddr8_recv"; - case PG_TYPE_MULTIRANGE: - return "multirange_recv"; - case PG_TYPE_NAME: - return "namerecv"; - case PG_TYPE_NUMERIC: - return "numeric_recv"; - case PG_TYPE_OID: - return "oidrecv"; - case PG_TYPE_OIDVECTOR: - return "oidvectorrecv"; - case PG_TYPE_PATH: - return "path_recv"; - case PG_TYPE_PG_DDL_COMMAND: - return "pg_ddl_command_recv"; - case PG_TYPE_PG_DEPENDENCIES: - return "pg_dependencies_recv"; - case PG_TYPE_PG_LSN: - return "pg_lsn_recv"; - case PG_TYPE_PG_MCV_LIST: - return "pg_mcv_list_recv"; - case PG_TYPE_PG_NDISTINCT: - return "pg_ndistinct_recv"; - case PG_TYPE_PG_NODE_TREE: - return "pg_node_tree_recv"; - case PG_TYPE_PG_SNAPSHOT: - return "pg_snapshot_recv"; - case PG_TYPE_POINT: - return "point_recv"; - case PG_TYPE_POLY: - return "poly_recv"; - case PG_TYPE_RANGE: - return "range_recv"; - case PG_TYPE_RECORD: - return "record_recv"; - case PG_TYPE_REGCLASS: - return "regclassrecv"; - case PG_TYPE_REGCOLLATION: - return "regcollationrecv"; - case PG_TYPE_REGCONFIG: - return "regconfigrecv"; - case PG_TYPE_REGDICTIONARY: - return "regdictionaryrecv"; - case PG_TYPE_REGNAMESPACE: - return "regnamespacerecv"; - case PG_TYPE_REGOPERATOR: - return "regoperatorrecv"; - case PG_TYPE_REGOPER: - return "regoperrecv"; - case PG_TYPE_REGPROCEDURE: - return "regprocedurerecv"; - case PG_TYPE_REGPROC: - return "regprocrecv"; - case PG_TYPE_REGROLE: - return "regrolerecv"; - case PG_TYPE_REGTYPE: - return "regtyperecv"; - case PG_TYPE_TEXT: - return "textrecv"; - case PG_TYPE_TID: - return "tidrecv"; - case PG_TYPE_TIME: - return "time_recv"; - case PG_TYPE_TIMESTAMP: - return "timestamp_recv"; - case PG_TYPE_TIMESTAMPTZ: - return "timestamptz_recv"; - case PG_TYPE_TIMETZ: - return "timetz_recv"; - case PG_TYPE_TSQUERY: - return "tsqueryrecv"; - case PG_TYPE_TSVECTOR: - return "tsvectorrecv"; - case PG_TYPE_TXID_SNAPSHOT: - return "txid_snapshot_recv"; - case PG_TYPE_UNKNOWN: - return "unknownrecv"; - case PG_TYPE_UUID: - return "uuid_recv"; - case PG_TYPE_VARBIT: - return "varbit_recv"; - case PG_TYPE_VARCHAR: - return "varcharrecv"; - case PG_TYPE_VOID: - return "void_recv"; - case PG_TYPE_XID8: - return "xid8recv"; - case PG_TYPE_XID: - return "xidrecv"; - case PG_TYPE_XML: - return "xml_recv"; - default: - return ""; - } -} - -static inline const char* PostgresTypname(PostgresTypeId type_id) { - switch (type_id) { - case PG_TYPE_BIT: - return "bit"; - case PG_TYPE_BOOL: - return "bool"; - case PG_TYPE_BYTEA: - return "bytea"; - case PG_TYPE_CASH: - return "cash"; - case PG_TYPE_CHAR: - return "char"; - case PG_TYPE_DATE: - return "date"; - case PG_TYPE_FLOAT4: - return "float4"; - case PG_TYPE_FLOAT8: - return "float8"; - case PG_TYPE_INT2: - return "int2"; - case PG_TYPE_INT4: - return "int4"; - case PG_TYPE_INT8: - return "int8"; - case PG_TYPE_INTERVAL: - return "interval"; - case PG_TYPE_NUMERIC: - return "numeric"; - case PG_TYPE_OID: - return "oid"; - case PG_TYPE_TEXT: - return "text"; - case PG_TYPE_TIME: - return "time"; - case PG_TYPE_TIMESTAMP: - return "timestamp"; - case PG_TYPE_TIMESTAMPTZ: - return "timestamptz"; - case PG_TYPE_TIMETZ: - return "timetz"; - case PG_TYPE_UUID: - return "uuid"; - case PG_TYPE_VARBIT: - return "varbit"; - case PG_TYPE_VARCHAR: - return "varchar"; - - case PG_TYPE_ARRAY: - return "array"; - case PG_TYPE_RECORD: - return "record"; - case PG_TYPE_RANGE: - return "range"; - case PG_TYPE_DOMAIN: - return "domain"; - default: - return ""; - } -} +static inline const char* PostgresTyprecv(PostgresTypeId type_id); +static inline const char* PostgresTypname(PostgresTypeId type_id); +static inline std::vector PostgresTypeIdAll(bool nested = true); class PostgresType { public: - static std::vector PgRecvAllBase(bool nested = true) { - std::vector base = { - PG_TYPE_BIT, PG_TYPE_BOOL, PG_TYPE_BYTEA, PG_TYPE_CASH, - PG_TYPE_CHAR, PG_TYPE_BPCHAR, PG_TYPE_DATE, PG_TYPE_FLOAT4, - PG_TYPE_FLOAT8, PG_TYPE_INT2, PG_TYPE_INT4, PG_TYPE_INT8, - PG_TYPE_INTERVAL, PG_TYPE_NUMERIC, PG_TYPE_OID, PG_TYPE_TEXT, - PG_TYPE_TIME, PG_TYPE_TIMESTAMP, PG_TYPE_TIMESTAMPTZ, PG_TYPE_TIMETZ, - PG_TYPE_UUID, PG_TYPE_VARBIT, PG_TYPE_VARCHAR}; - - if (nested) { - base.push_back(PG_TYPE_ARRAY); - base.push_back(PG_TYPE_RECORD); - base.push_back(PG_TYPE_RANGE); - base.push_back(PG_TYPE_DOMAIN); - } - - return base; - } - explicit PostgresType(PostgresTypeId type_id) : oid_(0), type_id_(type_id) {} PostgresType() : PostgresType(PG_TYPE_UNINITIALIZED) {} @@ -473,7 +235,7 @@ class PostgresType { public: static std::unordered_map AllBase() { std::unordered_map out; - for (PostgresTypeId type_id : PgRecvAllBase()) { + for (PostgresTypeId type_id : PostgresTypeIdAll()) { PostgresType type(type_id); type.typname_ = PostgresTypname(type_id); out.insert({PostgresTyprecv(type_id), type}); @@ -604,4 +366,307 @@ class PostgresTypeResolver { std::unordered_map base_; }; +static inline const char* PostgresTyprecv(PostgresTypeId type_id) { + switch (type_id) { + case PG_TYPE_ANYARRAY: + return "anyarray_recv"; + case PG_TYPE_ANYCOMPATIBLEARRAY: + return "anycompatiblearray_recv"; + case PG_TYPE_ARRAY: + return "array_recv"; + case PG_TYPE_BIT: + return "bit_recv"; + case PG_TYPE_BOOL: + return "boolrecv"; + case PG_TYPE_BOX: + return "box_recv"; + case PG_TYPE_BPCHAR: + return "bpcharrecv"; + case PG_TYPE_BRIN_BLOOM_SUMMARY: + return "brin_bloom_summary_recv"; + case PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY: + return "brin_minmax_multi_summary_recv"; + case PG_TYPE_BYTEA: + return "bytearecv"; + case PG_TYPE_CASH: + return "cash_recv"; + case PG_TYPE_CHAR: + return "charrecv"; + case PG_TYPE_CIDR: + return "cidr_recv"; + case PG_TYPE_CID: + return "cidrecv"; + case PG_TYPE_CIRCLE: + return "circle_recv"; + case PG_TYPE_CSTRING: + return "cstring_recv"; + case PG_TYPE_DATE: + return "date_recv"; + case PG_TYPE_DOMAIN: + return "domain_recv"; + case PG_TYPE_FLOAT4: + return "float4recv"; + case PG_TYPE_FLOAT8: + return "float8recv"; + case PG_TYPE_INET: + return "inet_recv"; + case PG_TYPE_INT2: + return "int2recv"; + case PG_TYPE_INT2VECTOR: + return "int2vectorrecv"; + case PG_TYPE_INT4: + return "int4recv"; + case PG_TYPE_INT8: + return "int8recv"; + case PG_TYPE_INTERVAL: + return "interval_recv"; + case PG_TYPE_JSON: + return "json_recv"; + case PG_TYPE_JSONB: + return "jsonb_recv"; + case PG_TYPE_JSONPATH: + return "jsonpath_recv"; + case PG_TYPE_LINE: + return "line_recv"; + case PG_TYPE_LSEG: + return "lseg_recv"; + case PG_TYPE_MACADDR: + return "macaddr_recv"; + case PG_TYPE_MACADDR8: + return "macaddr8_recv"; + case PG_TYPE_MULTIRANGE: + return "multirange_recv"; + case PG_TYPE_NAME: + return "namerecv"; + case PG_TYPE_NUMERIC: + return "numeric_recv"; + case PG_TYPE_OID: + return "oidrecv"; + case PG_TYPE_OIDVECTOR: + return "oidvectorrecv"; + case PG_TYPE_PATH: + return "path_recv"; + case PG_TYPE_PG_DDL_COMMAND: + return "pg_ddl_command_recv"; + case PG_TYPE_PG_DEPENDENCIES: + return "pg_dependencies_recv"; + case PG_TYPE_PG_LSN: + return "pg_lsn_recv"; + case PG_TYPE_PG_MCV_LIST: + return "pg_mcv_list_recv"; + case PG_TYPE_PG_NDISTINCT: + return "pg_ndistinct_recv"; + case PG_TYPE_PG_NODE_TREE: + return "pg_node_tree_recv"; + case PG_TYPE_PG_SNAPSHOT: + return "pg_snapshot_recv"; + case PG_TYPE_POINT: + return "point_recv"; + case PG_TYPE_POLY: + return "poly_recv"; + case PG_TYPE_RANGE: + return "range_recv"; + case PG_TYPE_RECORD: + return "record_recv"; + case PG_TYPE_REGCLASS: + return "regclassrecv"; + case PG_TYPE_REGCOLLATION: + return "regcollationrecv"; + case PG_TYPE_REGCONFIG: + return "regconfigrecv"; + case PG_TYPE_REGDICTIONARY: + return "regdictionaryrecv"; + case PG_TYPE_REGNAMESPACE: + return "regnamespacerecv"; + case PG_TYPE_REGOPERATOR: + return "regoperatorrecv"; + case PG_TYPE_REGOPER: + return "regoperrecv"; + case PG_TYPE_REGPROCEDURE: + return "regprocedurerecv"; + case PG_TYPE_REGPROC: + return "regprocrecv"; + case PG_TYPE_REGROLE: + return "regrolerecv"; + case PG_TYPE_REGTYPE: + return "regtyperecv"; + case PG_TYPE_TEXT: + return "textrecv"; + case PG_TYPE_TID: + return "tidrecv"; + case PG_TYPE_TIME: + return "time_recv"; + case PG_TYPE_TIMESTAMP: + return "timestamp_recv"; + case PG_TYPE_TIMESTAMPTZ: + return "timestamptz_recv"; + case PG_TYPE_TIMETZ: + return "timetz_recv"; + case PG_TYPE_TSQUERY: + return "tsqueryrecv"; + case PG_TYPE_TSVECTOR: + return "tsvectorrecv"; + case PG_TYPE_TXID_SNAPSHOT: + return "txid_snapshot_recv"; + case PG_TYPE_UNKNOWN: + return "unknownrecv"; + case PG_TYPE_UUID: + return "uuid_recv"; + case PG_TYPE_VARBIT: + return "varbit_recv"; + case PG_TYPE_VARCHAR: + return "varcharrecv"; + case PG_TYPE_VOID: + return "void_recv"; + case PG_TYPE_XID8: + return "xid8recv"; + case PG_TYPE_XID: + return "xidrecv"; + case PG_TYPE_XML: + return "xml_recv"; + default: + return ""; + } +} + +static inline const char* PostgresTypname(PostgresTypeId type_id) { + switch (type_id) { + case PG_TYPE_BIT: + return "bit"; + case PG_TYPE_BOOL: + return "bool"; + case PG_TYPE_BYTEA: + return "bytea"; + case PG_TYPE_CASH: + return "cash"; + case PG_TYPE_CHAR: + return "char"; + case PG_TYPE_DATE: + return "date"; + case PG_TYPE_FLOAT4: + return "float4"; + case PG_TYPE_FLOAT8: + return "float8"; + case PG_TYPE_INT2: + return "int2"; + case PG_TYPE_INT4: + return "int4"; + case PG_TYPE_INT8: + return "int8"; + case PG_TYPE_INTERVAL: + return "interval"; + case PG_TYPE_NUMERIC: + return "numeric"; + case PG_TYPE_OID: + return "oid"; + case PG_TYPE_TEXT: + return "text"; + case PG_TYPE_TIME: + return "time"; + case PG_TYPE_TIMESTAMP: + return "timestamp"; + case PG_TYPE_TIMESTAMPTZ: + return "timestamptz"; + case PG_TYPE_TIMETZ: + return "timetz"; + case PG_TYPE_UUID: + return "uuid"; + case PG_TYPE_VARBIT: + return "varbit"; + case PG_TYPE_VARCHAR: + return "varchar"; + + case PG_TYPE_ARRAY: + return "array"; + case PG_TYPE_RECORD: + return "record"; + case PG_TYPE_RANGE: + return "range"; + case PG_TYPE_DOMAIN: + return "domain"; + default: + return ""; + } +} + +static inline std::vector PostgresTypeIdAll(bool nested) { + std::vector base = {PG_TYPE_UNINITIALIZED, + PG_TYPE_ANYARRAY, + PG_TYPE_ANYCOMPATIBLEARRAY, + PG_TYPE_BIT, + PG_TYPE_BOOL, + PG_TYPE_BOX, + PG_TYPE_BPCHAR, + PG_TYPE_BRIN_BLOOM_SUMMARY, + PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY, + PG_TYPE_BYTEA, + PG_TYPE_CASH, + PG_TYPE_CHAR, + PG_TYPE_CIDR, + PG_TYPE_CID, + PG_TYPE_CIRCLE, + PG_TYPE_CSTRING, + PG_TYPE_DATE, + PG_TYPE_FLOAT4, + PG_TYPE_FLOAT8, + PG_TYPE_INET, + PG_TYPE_INT2, + PG_TYPE_INT2VECTOR, + PG_TYPE_INT4, + PG_TYPE_INT8, + PG_TYPE_INTERVAL, + PG_TYPE_JSON, + PG_TYPE_JSONB, + PG_TYPE_JSONPATH, + PG_TYPE_LINE, + PG_TYPE_LSEG, + PG_TYPE_MACADDR, + PG_TYPE_MACADDR8, + PG_TYPE_MULTIRANGE, + PG_TYPE_NAME, + PG_TYPE_NUMERIC, + PG_TYPE_OID, + PG_TYPE_OIDVECTOR, + PG_TYPE_PATH, + PG_TYPE_POINT, + PG_TYPE_POLY, + PG_TYPE_REGCLASS, + PG_TYPE_REGCOLLATION, + PG_TYPE_REGCONFIG, + PG_TYPE_REGDICTIONARY, + PG_TYPE_REGNAMESPACE, + PG_TYPE_REGOPERATOR, + PG_TYPE_REGOPER, + PG_TYPE_REGPROCEDURE, + PG_TYPE_REGPROC, + PG_TYPE_REGROLE, + PG_TYPE_REGTYPE, + PG_TYPE_TEXT, + PG_TYPE_TID, + PG_TYPE_TIME, + PG_TYPE_TIMESTAMP, + PG_TYPE_TIMESTAMPTZ, + PG_TYPE_TIMETZ, + PG_TYPE_TSQUERY, + PG_TYPE_TSVECTOR, + PG_TYPE_TXID_SNAPSHOT, + PG_TYPE_UNKNOWN, + PG_TYPE_UUID, + PG_TYPE_VARBIT, + PG_TYPE_VARCHAR, + PG_TYPE_VOID, + PG_TYPE_XID8, + PG_TYPE_XID, + PG_TYPE_XML}; + + if (nested) { + base.push_back(PG_TYPE_ARRAY); + base.push_back(PG_TYPE_RECORD); + base.push_back(PG_TYPE_RANGE); + base.push_back(PG_TYPE_DOMAIN); + } + + return base; +} + } // namespace adbcpq diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 4b1e1b86b5..aef19a7846 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -29,12 +29,12 @@ using adbcpq::PostgresTypeResolver; class MockTypeResolver : public PostgresTypeResolver { public: ArrowErrorCode Init() { - auto recv_base = PostgresType::PgRecvAllBase(false); + auto all_types = adbcpq::PostgresTypeIdAll(false); PostgresTypeResolver::Item item; item.oid = 0; // Insert all the base types - for (auto type_id : recv_base) { + for (auto type_id : all_types) { std::string typreceive = adbcpq::PostgresTyprecv(type_id); std::string typname = adbcpq::PostgresTypname(type_id); item.oid++; @@ -199,7 +199,7 @@ TEST(PostgresTypeTest, PostgresTypeAllBase) { auto base_types = PostgresType::AllBase(); EXPECT_EQ(base_types["array_recv"].type_id(), PostgresTypeId::PG_TYPE_ARRAY); EXPECT_EQ(base_types["array_recv"].typname(), "array"); - EXPECT_EQ(base_types.size(), PostgresType::PgRecvAllBase().size()); + EXPECT_EQ(base_types.size(), adbcpq::PostgresTypeIdAll().size()); } TEST(PostgresTypeTest, PostgresTypeResolver) { From d543b62a248a04657e62514a2f95b18431eec9a3 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 10:32:28 -0300 Subject: [PATCH 63/90] fix typname --- c/driver/postgresql/postgres_type.h | 121 ++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 23 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 0daabc6121..eaa71a48c8 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -446,20 +446,6 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id) { return "oidvectorrecv"; case PG_TYPE_PATH: return "path_recv"; - case PG_TYPE_PG_DDL_COMMAND: - return "pg_ddl_command_recv"; - case PG_TYPE_PG_DEPENDENCIES: - return "pg_dependencies_recv"; - case PG_TYPE_PG_LSN: - return "pg_lsn_recv"; - case PG_TYPE_PG_MCV_LIST: - return "pg_mcv_list_recv"; - case PG_TYPE_PG_NDISTINCT: - return "pg_ndistinct_recv"; - case PG_TYPE_PG_NODE_TREE: - return "pg_node_tree_recv"; - case PG_TYPE_PG_SNAPSHOT: - return "pg_snapshot_recv"; case PG_TYPE_POINT: return "point_recv"; case PG_TYPE_POLY: @@ -531,36 +517,118 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id) { static inline const char* PostgresTypname(PostgresTypeId type_id) { switch (type_id) { + case PG_TYPE_ANYARRAY: + return "anyarray"; + case PG_TYPE_ANYCOMPATIBLEARRAY: + return "anycompatiblearray"; + case PG_TYPE_ARRAY: + return "array"; case PG_TYPE_BIT: return "bit"; case PG_TYPE_BOOL: return "bool"; + case PG_TYPE_BOX: + return "box"; + case PG_TYPE_BPCHAR: + return "bpchar"; + case PG_TYPE_BRIN_BLOOM_SUMMARY: + return "brin_bloom_summary"; + case PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY: + return "brin_minmax_multi_summary"; case PG_TYPE_BYTEA: return "bytea"; case PG_TYPE_CASH: return "cash"; case PG_TYPE_CHAR: return "char"; + case PG_TYPE_CIDR: + return "cidr"; + case PG_TYPE_CID: + return "cid"; + case PG_TYPE_CIRCLE: + return "circle"; + case PG_TYPE_CSTRING: + return "cstring"; case PG_TYPE_DATE: return "date"; + case PG_TYPE_DOMAIN: + return "domain"; case PG_TYPE_FLOAT4: return "float4"; case PG_TYPE_FLOAT8: return "float8"; + case PG_TYPE_INET: + return "inet"; case PG_TYPE_INT2: return "int2"; + case PG_TYPE_INT2VECTOR: + return "int2vector"; case PG_TYPE_INT4: return "int4"; case PG_TYPE_INT8: return "int8"; case PG_TYPE_INTERVAL: return "interval"; + case PG_TYPE_JSON: + return "json"; + case PG_TYPE_JSONB: + return "jsonb"; + case PG_TYPE_JSONPATH: + return "jsonpath"; + case PG_TYPE_LINE: + return "line"; + case PG_TYPE_LSEG: + return "lseg"; + case PG_TYPE_MACADDR: + return "macaddr"; + case PG_TYPE_MACADDR8: + return "macaddr8"; + case PG_TYPE_MULTIRANGE: + return "multirange"; + case PG_TYPE_NAME: + return "name"; case PG_TYPE_NUMERIC: return "numeric"; case PG_TYPE_OID: return "oid"; + case PG_TYPE_OIDVECTOR: + return "oidvector"; + case PG_TYPE_PATH: + return "path"; + case PG_TYPE_POINT: + return "point"; + case PG_TYPE_POLY: + return "poly"; + case PG_TYPE_RANGE: + return "range"; + case PG_TYPE_RECORD: + return "record"; + case PG_TYPE_REGCLASS: + return "regclass"; + case PG_TYPE_REGCOLLATION: + return "regcollation"; + case PG_TYPE_REGCONFIG: + return "regconfig"; + case PG_TYPE_REGDICTIONARY: + return "regdictionary"; + case PG_TYPE_REGNAMESPACE: + return "regnamespace"; + case PG_TYPE_REGOPERATOR: + return "regoperator"; + case PG_TYPE_REGOPER: + return "regoper"; + case PG_TYPE_REGPROCEDURE: + return "regprocedure"; + case PG_TYPE_REGPROC: + return "regproc"; + case PG_TYPE_REGROLE: + return "regrole"; + case PG_TYPE_REGTYPE: + return "regtype"; case PG_TYPE_TEXT: return "text"; + case PG_TYPE_TID: + return "tid"; case PG_TYPE_TIME: return "time"; case PG_TYPE_TIMESTAMP: @@ -569,21 +637,28 @@ static inline const char* PostgresTypname(PostgresTypeId type_id) { return "timestamptz"; case PG_TYPE_TIMETZ: return "timetz"; + case PG_TYPE_TSQUERY: + return "tsquery"; + case PG_TYPE_TSVECTOR: + return "tsvector"; + case PG_TYPE_TXID_SNAPSHOT: + return "txid_snapshot"; + case PG_TYPE_UNKNOWN: + return "unknown"; case PG_TYPE_UUID: return "uuid"; case PG_TYPE_VARBIT: return "varbit"; case PG_TYPE_VARCHAR: return "varchar"; - - case PG_TYPE_ARRAY: - return "array"; - case PG_TYPE_RECORD: - return "record"; - case PG_TYPE_RANGE: - return "range"; - case PG_TYPE_DOMAIN: - return "domain"; + case PG_TYPE_VOID: + return "void"; + case PG_TYPE_XID8: + return "xid8"; + case PG_TYPE_XID: + return "xid"; + case PG_TYPE_XML: + return "xml"; default: return ""; } From 5f1b109c279e19832c8154b0fa2c2ee37eb61c79 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 11:22:50 -0300 Subject: [PATCH 64/90] documentation --- c/driver/postgresql/postgres_type.h | 90 +++++++++++++++++------ c/driver/postgresql/postgres_type_test.cc | 13 +--- 2 files changed, 70 insertions(+), 33 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index eaa71a48c8..722212f35a 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -28,6 +28,7 @@ namespace adbcpq { +// An enum of the types available in most Postgres pg_type tables enum PostgresTypeId { PG_TYPE_UNINITIALIZED, PG_TYPE_ANYARRAY, @@ -110,10 +111,23 @@ enum PostgresTypeId { PG_TYPE_XML }; +// Returns the receive function name as defined in the typrecieve column +// of the pg_type table. This name is the one that gets used to look up +// the PostgresTypeId. static inline const char* PostgresTyprecv(PostgresTypeId type_id); + +// Returns a likely typname value for a given PostgresTypeId. This is useful +// for testing and error messages but may not be the actual value present +// in the pg_type typname column. static inline const char* PostgresTypname(PostgresTypeId type_id); + +// A vector of all type IDs, optionally with the nested types PG_TYPE_ARRAY, +// PG_TYPE_DOMAIN, PG_TYPE_RECORD, and PG_TYPE_RANGE. static inline std::vector PostgresTypeIdAll(bool nested = true); +// An abstraction of a (potentially nested and/or parameterized) Postgres +// data type. This class is where default type conversion to/from Arrow +// is defined. It is intentionally copyable. class PostgresType { public: explicit PostgresType(PostgresTypeId type_id) : oid_(0), type_id_(type_id) {} @@ -165,6 +179,13 @@ class PostgresType { int64_t n_children() const { return static_cast(children_.size()); } const PostgresType* child(int64_t i) const { return &children_[i]; } + // Sets appropriate fields of an ArrowSchema that has been initialized using + // ArrowSchemaInit. This is a recursive operation (i.e., nested types will + // initialize and set the appropriate number of children). Returns NANOARROW_OK + // on success and perhaps ENOMEM if memory cannot be allocated. Types that + // do not have a corresponding Arrow type are returned as Binary with field + // metadata ADBC:posgresql:typname. These types can be represented as their + // binary COPY representation in the output. ArrowErrorCode SetSchema(ArrowSchema* schema) const { switch (type_id_) { case PG_TYPE_BOOL: @@ -231,20 +252,15 @@ class PostgresType { std::string typname_; std::string field_name_; std::vector children_; - - public: - static std::unordered_map AllBase() { - std::unordered_map out; - for (PostgresTypeId type_id : PostgresTypeIdAll()) { - PostgresType type(type_id); - type.typname_ = PostgresTypname(type_id); - out.insert({PostgresTyprecv(type_id), type}); - } - - return out; - } }; +// Because type information is stored in a database's pg_type table, it can't +// truly be resolved until runtime; however, querying the database's pg_type table +// for every result is unlikely to be reasonable. This class is a cache of information +// from the pg_type table with appropriate lookup tables to resolve a PostgresType +// instance based on a oid (which is the information that libpq provides when +// inspecting a result object). Types can be added/removed from the pg_type table +// via SQL, so this cache may need to be periodically refreshed. class PostgresTypeResolver { public: struct Item { @@ -256,8 +272,11 @@ class PostgresTypeResolver { uint32_t class_oid; }; - PostgresTypeResolver() : base_(PostgresType::AllBase()) {} + PostgresTypeResolver() : base_(AllBase()) {} + // Place a resolved copy of a PostgresType with the appropriate oid in type_out + // if NANOARROW_OK is returned or place a null-terminated error message into error + // otherwise. ArrowErrorCode Find(uint32_t oid, PostgresType* type_out, ArrowError* error) const { auto result = mapping_.find(oid); if (result == mapping_.end()) { @@ -270,6 +289,8 @@ class PostgresTypeResolver { return NANOARROW_OK; } + // Resolve the oid for a given type_id. Returns 0 if the oid cannot be + // resolved. uint32_t GetOID(PostgresTypeId type_id) const { auto result = reverse_mapping_.find(type_id); if (result == reverse_mapping_.end()) { @@ -279,6 +300,11 @@ class PostgresTypeResolver { } } + // Insert a type into this resolver. Returns NANOARROW_OK on success + // or places a null-terminated error message into error otherwise. The order + // of Inserts matters: Non-array types must be inserted before the corresponding + // array types and class definitions must be inserted before the corresponding + // class type using InsertClass(). ArrowErrorCode Insert(const Item& item, ArrowError* error) { auto result = base_.find(item.typreceive); if (result == base_.end()) { @@ -300,14 +326,14 @@ class PostgresTypeResolver { } case PG_TYPE_RECORD: { - std::vector> child_desc; + std::vector> child_desc; NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); PostgresType out(PG_TYPE_RECORD); for (const auto& child_item : child_desc) { PostgresType child; - NANOARROW_RETURN_NOT_OK(Find(child_item.first, &child, error)); - out.AppendChild(child_item.second, child); + NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); + out.AppendChild(child_item.first, child); } mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); @@ -340,13 +366,24 @@ class PostgresTypeResolver { return NANOARROW_OK; } + // Insert a class definition. For the purposes of resolving a PostgresType + // instance, this is simply a vector of field_name: oid tuples. The specified + // OIDs need not have already been inserted into the type resolver. This + // information can be found in the pg_attribute table (attname and atttypoid, + // respectively). void InsertClass(uint32_t oid, - const std::vector>& cls) { + const std::vector>& cls) { classes_.insert({oid, cls}); } + private: + std::unordered_map mapping_; + std::unordered_map reverse_mapping_; + std::unordered_map>> classes_; + std::unordered_map base_; + ArrowErrorCode ResolveClass(uint32_t oid, - std::vector>* out, + std::vector>* out, ArrowError* error) { auto result = classes_.find(oid); if (result == classes_.end()) { @@ -359,11 +396,18 @@ class PostgresTypeResolver { return NANOARROW_OK; } - private: - std::unordered_map mapping_; - std::unordered_map reverse_mapping_; - std::unordered_map>> classes_; - std::unordered_map base_; + // Returns a sentinel PostgresType instance for each type and builds a lookup + // table based on the receive function name. + static std::unordered_map AllBase() { + std::unordered_map out; + for (PostgresTypeId type_id : PostgresTypeIdAll()) { + PostgresType type(type_id); + out.insert( + {PostgresTyprecv(type_id), type.WithPgTypeInfo(0, PostgresTypname(type_id))}); + } + + return out; + } }; static inline const char* PostgresTyprecv(PostgresTypeId type_id) { diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index aef19a7846..4c1be632f0 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -64,9 +64,9 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; uint32_t class_oid = item.oid; - std::vector> record_fields = { - {GetOID(PostgresTypeId::PG_TYPE_INT4), "int4_col"}, - {GetOID(PostgresTypeId::PG_TYPE_TEXT), "text_col"}}; + std::vector> record_fields = { + {"int4_col", GetOID(PostgresTypeId::PG_TYPE_INT4)}, + {"text_col", GetOID(PostgresTypeId::PG_TYPE_TEXT)}}; InsertClass(class_oid, std::move(record_fields)); item.oid++; @@ -195,13 +195,6 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { schema.release(&schema); } -TEST(PostgresTypeTest, PostgresTypeAllBase) { - auto base_types = PostgresType::AllBase(); - EXPECT_EQ(base_types["array_recv"].type_id(), PostgresTypeId::PG_TYPE_ARRAY); - EXPECT_EQ(base_types["array_recv"].typname(), "array"); - EXPECT_EQ(base_types.size(), adbcpq::PostgresTypeIdAll().size()); -} - TEST(PostgresTypeTest, PostgresTypeResolver) { PostgresTypeResolver resolver; ArrowError error; From 77e77b85ea3fdae8adf42e4c47aea59bab2b2d6c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 11:39:03 -0300 Subject: [PATCH 65/90] a few more tweaks --- c/driver/postgresql/postgres_type.h | 4 ++-- c/driver/postgresql/postgres_type_test.cc | 28 +++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 722212f35a..d00019ffae 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -121,7 +121,7 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id); // in the pg_type typname column. static inline const char* PostgresTypname(PostgresTypeId type_id); -// A vector of all type IDs, optionally with the nested types PG_TYPE_ARRAY, +// A vector of all type IDs, optionally including the nested types PG_TYPE_ARRAY, // PG_TYPE_DOMAIN, PG_TYPE_RECORD, and PG_TYPE_RANGE. static inline std::vector PostgresTypeIdAll(bool nested = true); @@ -177,7 +177,7 @@ class PostgresType { const std::string& typname() const { return typname_; } const std::string& field_name() const { return field_name_; } int64_t n_children() const { return static_cast(children_.size()); } - const PostgresType* child(int64_t i) const { return &children_[i]; } + const PostgresType& child(int64_t i) const { return children_[i]; } // Sets appropriate fields of an ArrowSchema that has been initialized using // ArrowSchemaInit. This is a recursive operation (i.e., nested types will diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 4c1be632f0..aabbf1cf57 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -101,15 +101,15 @@ TEST(PostgresTypeTest, PostgresTypeBasic) { EXPECT_EQ(array.oid(), 12345); EXPECT_EQ(array.typname(), "array type name"); EXPECT_EQ(array.n_children(), 1); - EXPECT_EQ(array.child(0)->oid(), type.oid()); - EXPECT_EQ(array.child(0)->type_id(), type.type_id()); + EXPECT_EQ(array.child(0).oid(), type.oid()); + EXPECT_EQ(array.child(0).type_id(), type.type_id()); PostgresType range = type.Range(12345, "range type name"); EXPECT_EQ(range.oid(), 12345); EXPECT_EQ(range.typname(), "range type name"); EXPECT_EQ(range.n_children(), 1); - EXPECT_EQ(range.child(0)->oid(), type.oid()); - EXPECT_EQ(range.child(0)->type_id(), type.type_id()); + EXPECT_EQ(range.child(0).oid(), type.oid()); + EXPECT_EQ(range.child(0).type_id(), type.type_id()); PostgresType domain = type.Domain(123456, "domain type name"); EXPECT_EQ(domain.oid(), 123456); @@ -120,8 +120,8 @@ TEST(PostgresTypeTest, PostgresTypeBasic) { record.AppendChild("col1", type); EXPECT_EQ(record.type_id(), PostgresTypeId::PG_TYPE_RECORD); EXPECT_EQ(record.n_children(), 1); - EXPECT_EQ(record.child(0)->type_id(), type.type_id()); - EXPECT_EQ(record.child(0)->field_name(), "col1"); + EXPECT_EQ(record.child(0).type_id(), type.type_id()); + EXPECT_EQ(record.child(0).field_name(), "col1"); } TEST(PostgresTypeTest, PostgresTypeSetSchema) { @@ -262,8 +262,8 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(type.oid(), 11); EXPECT_EQ(type.typname(), "some_array_type_name"); EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_ARRAY); - EXPECT_EQ(type.child(0)->oid(), 10); - EXPECT_EQ(type.child(0)->type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.child(0).oid(), 10); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); // Check insert/resolve of range type item.oid = 12; @@ -275,8 +275,8 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(type.oid(), 12); EXPECT_EQ(type.typname(), "some_range_type_name"); EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_RANGE); - EXPECT_EQ(type.child(0)->oid(), 10); - EXPECT_EQ(type.child(0)->type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.child(0).oid(), 10); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); // Check insert/resolve of domain type item.oid = 13; @@ -301,8 +301,8 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { NANOARROW_OK); EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::PG_TYPE_RECORD)); EXPECT_EQ(type.n_children(), 2); - EXPECT_EQ(type.child(0)->field_name(), "int4_col"); - EXPECT_EQ(type.child(0)->type_id(), PostgresTypeId::PG_TYPE_INT4); - EXPECT_EQ(type.child(1)->field_name(), "text_col"); - EXPECT_EQ(type.child(1)->type_id(), PostgresTypeId::PG_TYPE_TEXT); + EXPECT_EQ(type.child(0).field_name(), "int4_col"); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_INT4); + EXPECT_EQ(type.child(1).field_name(), "text_col"); + EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::PG_TYPE_TEXT); } From cb9751653be95534e4d63afe063392d70b810b6c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 14:53:08 -0300 Subject: [PATCH 66/90] factor out building the type resolver --- c/driver/postgresql/database.cc | 231 ++++++++++++++++++++++------ c/driver/postgresql/database.h | 2 + c/driver/postgresql/postgres_type.h | 42 ++++- 3 files changed, 224 insertions(+), 51 deletions(-) diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index 7d764bcd8f..e9dac5af07 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -35,56 +35,7 @@ PostgresDatabase::~PostgresDatabase() = default; AdbcStatusCode PostgresDatabase::Init(struct AdbcError* error) { // Connect to validate the parameters. - PGconn* conn = nullptr; - AdbcStatusCode final_status = Connect(&conn, error); - if (final_status != ADBC_STATUS_OK) { - return final_status; - } - - // Build the type mapping table. - const std::string kTypeQuery = R"( -SELECT - oid, - typname, - typreceive -FROM - pg_catalog.pg_type -WHERE - typelem = 0 AND typrelid = 0 AND typbasetype = 0 -)"; - - pg_result* result = PQexec(conn, kTypeQuery.c_str()); - ExecStatusType pq_status = PQresultStatus(result); - if (pq_status == PGRES_TUPLES_OK) { - int num_rows = PQntuples(result); - PostgresTypeResolver::Item item; - - for (int row = 0; row < num_rows; row++) { - const uint32_t oid = static_cast( - std::strtol(PQgetvalue(result, row, 0), /*str_end=*/nullptr, /*base=*/10)); - const char* typname = PQgetvalue(result, row, 1); - const char* typreceive = PQgetvalue(result, row, 2); - - item.oid = oid; - item.typname = typname; - item.typreceive = typreceive; - - // Intentionally ignoring types we don't know how to deal with. These will error - // later if there is a query that actually contains them. - type_resolver_->Insert(item, nullptr); - } - } else { - SetError(error, "Failed to build type mapping table: ", PQerrorMessage(conn)); - final_status = ADBC_STATUS_IO; - } - PQclear(result); - - // Disconnect since PostgreSQL connections can be heavy. - { - AdbcStatusCode status = Disconnect(&conn, error); - if (status != ADBC_STATUS_OK) final_status = status; - } - return final_status; + return RebuildTypeResolver(error); } AdbcStatusCode PostgresDatabase::Release(struct AdbcError* error) { @@ -131,4 +82,184 @@ AdbcStatusCode PostgresDatabase::Disconnect(PGconn** conn, struct AdbcError* err } return ADBC_STATUS_OK; } + +// Helpers for building the type resolver from queries +static inline int32_t InsertPgAttributeResult( + pg_result* result, const std::shared_ptr& resolver); + +static inline int32_t InsertPgTypeResult( + pg_result* result, const std::shared_ptr& resolver); + +AdbcStatusCode PostgresDatabase::RebuildTypeResolver(struct AdbcError* error) { + PGconn* conn = nullptr; + AdbcStatusCode final_status = Connect(&conn, error); + if (final_status != ADBC_STATUS_OK) { + return final_status; + } + + // We need a few queries to build the resolver. The current strategy might + // fail for some recursive definitions (e.g., arrays of records of arrays). + // First, one on the pg_attribute table to resolve column names/oids for + // record types. + const std::string kColumnsQuery = R"( +SELECT + attrelid, + attname, + atttypid +FROM + pg_catalog.pg_attribute +ORDER BY + attrelid, attnum +)"; + + // Second, a query of the pg_type table with the arrays last. + // This query may need a few attempts to handle recursive definitions + // (e.g., record types with array column). Put the arrays last to minimize + // the number of attempts we need. This currently won't handle range types. + const std::string kTypeQuery = R"( +SELECT + oid, + typname, + typreceive, + typbasetype, + typelem, + typrelid +FROM + pg_catalog.pg_type +WHERE + (typreceive != 0 OR typname = 'aclitem') AND typtype != 'r' +ORDER BY + typelem +)"; + + // Create a new type resolver (this instance's type_resolver_ member + // will be updated at the end if this succeeds). + auto resolver = std::make_shared(); + + // Insert record type definitions (this includes table schemas) + fprintf(stdout, "CLASS DEFINITIONS-----------\n"); + pg_result* result = PQexec(conn, kColumnsQuery.c_str()); + ExecStatusType pq_status = PQresultStatus(result); + if (pq_status == PGRES_TUPLES_OK) { + InsertPgAttributeResult(result, resolver); + } else { + SetError(error, "Failed to build type mapping table: ", PQerrorMessage(conn)); + final_status = ADBC_STATUS_IO; + } + + PQclear(result); + + // Attempt filling the resolver a few times to handle recursive definitions. + int32_t max_attempts = 3; + for (int32_t i = 0; i < max_attempts; i++) { + fprintf(stdout, "TYPES [%d]-----------\n", i); + result = PQexec(conn, kTypeQuery.c_str()); + ExecStatusType pq_status = PQresultStatus(result); + if (pq_status == PGRES_TUPLES_OK) { + InsertPgTypeResult(result, resolver); + } else { + SetError(error, "Failed to build type mapping table: ", PQerrorMessage(conn)); + final_status = ADBC_STATUS_IO; + } + + PQclear(result); + if (final_status != ADBC_STATUS_OK) { + break; + } + } + + // Disconnect since PostgreSQL connections can be heavy. + { + AdbcStatusCode status = Disconnect(&conn, error); + if (status != ADBC_STATUS_OK) final_status = status; + } + + if (final_status == ADBC_STATUS_OK) { + type_resolver_ = std::move(resolver); + } + + return final_status; +} + +static inline int32_t InsertPgAttributeResult( + pg_result* result, const std::shared_ptr& resolver) { + int num_rows = PQntuples(result); + std::vector> columns; + uint32_t current_type_oid = 0; + int32_t n_added = 0; + + for (int row = 0; row < num_rows; row++) { + const uint32_t type_oid = static_cast( + std::strtol(PQgetvalue(result, row, 0), /*str_end=*/nullptr, /*base=*/10)); + const char* col_name = PQgetvalue(result, row, 1); + const uint32_t col_oid = static_cast( + std::strtol(PQgetvalue(result, row, 2), /*str_end=*/nullptr, /*base=*/10)); + + if (type_oid != current_type_oid && !columns.empty()) { + resolver->InsertClass(current_type_oid, columns); + fprintf(stdout, "Inserting class with oid %ld\n", + static_cast(current_type_oid)); + columns.clear(); + current_type_oid = type_oid; + n_added++; + } + + columns.push_back({col_name, col_oid}); + } + + if (!columns.empty()) { + resolver->InsertClass(current_type_oid, columns); + fprintf(stdout, "Inserting class with oid %ld\n", + static_cast(current_type_oid)); + n_added++; + } + + return n_added; +} + +static inline int32_t InsertPgTypeResult( + pg_result* result, const std::shared_ptr& resolver) { + int num_rows = PQntuples(result); + PostgresTypeResolver::Item item; + int32_t n_added = 0; + + for (int row = 0; row < num_rows; row++) { + const uint32_t oid = static_cast( + std::strtol(PQgetvalue(result, row, 0), /*str_end=*/nullptr, /*base=*/10)); + const char* typname = PQgetvalue(result, row, 1); + const char* typreceive = PQgetvalue(result, row, 2); + const uint32_t typbasetype = static_cast( + std::strtol(PQgetvalue(result, row, 3), /*str_end=*/nullptr, /*base=*/10)); + const uint32_t typelem = static_cast( + std::strtol(PQgetvalue(result, row, 4), /*str_end=*/nullptr, /*base=*/10)); + const uint32_t typrelid = static_cast( + std::strtol(PQgetvalue(result, row, 5), /*str_end=*/nullptr, /*base=*/10)); + + // Special case the aclitem because it shows up in a bunch of internal tables + if (strcmp(typname, "aclitem") == 0) { + typreceive = "aclitem_recv"; + } + + item.oid = oid; + item.typname = typname; + item.typreceive = typreceive; + item.class_oid = typrelid; + if (typbasetype != 0) { + item.child_oid = typbasetype; + } else { + item.child_oid = typelem; + } + + ArrowError err; + if (resolver->Insert(item, &err) == NANOARROW_OK) { + fprintf(stdout, "[v] %s\n", item.typname); + n_added++; + } else { + fprintf(stdout, "[X] %s: %s\n", item.typname, err.message); + } + } + + return n_added; +} + } // namespace adbcpq diff --git a/c/driver/postgresql/database.h b/c/driver/postgresql/database.h index b57241febd..f10464787a 100644 --- a/c/driver/postgresql/database.h +++ b/c/driver/postgresql/database.h @@ -46,6 +46,8 @@ class PostgresDatabase { return type_resolver_; } + AdbcStatusCode RebuildTypeResolver(struct AdbcError* error); + private: int32_t open_connections_; std::string uri_; diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index d00019ffae..a765280256 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -31,6 +31,7 @@ namespace adbcpq { // An enum of the types available in most Postgres pg_type tables enum PostgresTypeId { PG_TYPE_UNINITIALIZED, + PG_TYPE_ACLITEM, PG_TYPE_ANYARRAY, PG_TYPE_ANYCOMPATIBLEARRAY, PG_TYPE_ARRAY, @@ -412,6 +413,8 @@ class PostgresTypeResolver { static inline const char* PostgresTyprecv(PostgresTypeId type_id) { switch (type_id) { + case PG_TYPE_ACLITEM: + return "aclitem_recv"; case PG_TYPE_ANYARRAY: return "anyarray_recv"; case PG_TYPE_ANYCOMPATIBLEARRAY: @@ -490,6 +493,20 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id) { return "oidvectorrecv"; case PG_TYPE_PATH: return "path_recv"; + case PG_TYPE_PG_NODE_TREE: + return "pg_node_tree_recv"; + case PG_TYPE_PG_NDISTINCT: + return "pg_ndistinct_recv"; + case PG_TYPE_PG_DEPENDENCIES: + return "pg_dependencies_recv"; + case PG_TYPE_PG_LSN: + return "pg_lsn_recv"; + case PG_TYPE_PG_MCV_LIST: + return "pg_mcv_list_recv"; + case PG_TYPE_PG_DDL_COMMAND: + return "pg_ddl_command_recv"; + case PG_TYPE_PG_SNAPSHOT: + return "pg_snapshot_recv"; case PG_TYPE_POINT: return "point_recv"; case PG_TYPE_POLY: @@ -561,6 +578,8 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id) { static inline const char* PostgresTypname(PostgresTypeId type_id) { switch (type_id) { + case PG_TYPE_ACLITEM: + return "aclitem"; case PG_TYPE_ANYARRAY: return "anyarray"; case PG_TYPE_ANYCOMPATIBLEARRAY: @@ -639,6 +658,20 @@ static inline const char* PostgresTypname(PostgresTypeId type_id) { return "oidvector"; case PG_TYPE_PATH: return "path"; + case PG_TYPE_PG_NODE_TREE: + return "pg_node_tree"; + case PG_TYPE_PG_NDISTINCT: + return "pg_ndistinct"; + case PG_TYPE_PG_DEPENDENCIES: + return "pg_dependencies"; + case PG_TYPE_PG_LSN: + return "pg_lsn"; + case PG_TYPE_PG_MCV_LIST: + return "pg_mcv_list"; + case PG_TYPE_PG_DDL_COMMAND: + return "pg_ddl_command"; + case PG_TYPE_PG_SNAPSHOT: + return "pg_snapshot"; case PG_TYPE_POINT: return "point"; case PG_TYPE_POLY: @@ -709,7 +742,7 @@ static inline const char* PostgresTypname(PostgresTypeId type_id) { } static inline std::vector PostgresTypeIdAll(bool nested) { - std::vector base = {PG_TYPE_UNINITIALIZED, + std::vector base = {PG_TYPE_ACLITEM, PG_TYPE_ANYARRAY, PG_TYPE_ANYCOMPATIBLEARRAY, PG_TYPE_BIT, @@ -747,6 +780,13 @@ static inline std::vector PostgresTypeIdAll(bool nested) { PG_TYPE_OID, PG_TYPE_OIDVECTOR, PG_TYPE_PATH, + PG_TYPE_PG_NODE_TREE, + PG_TYPE_PG_NDISTINCT, + PG_TYPE_PG_DEPENDENCIES, + PG_TYPE_PG_LSN, + PG_TYPE_PG_MCV_LIST, + PG_TYPE_PG_DDL_COMMAND, + PG_TYPE_PG_SNAPSHOT, PG_TYPE_POINT, PG_TYPE_POLY, PG_TYPE_REGCLASS, From 97110ddcc82dea94c301bdef09acdb85260049d7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 16:04:07 -0300 Subject: [PATCH 67/90] some tweaks to support all internal types --- c/driver/postgresql/database.cc | 39 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index e9dac5af07..e4a28aecee 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -112,24 +112,24 @@ ORDER BY attrelid, attnum )"; - // Second, a query of the pg_type table with the arrays last. - // This query may need a few attempts to handle recursive definitions - // (e.g., record types with array column). Put the arrays last to minimize - // the number of attempts we need. This currently won't handle range types. + // Second, a query of the pg_type table. This query may need a few attempts to handle + // recursive definitions (e.g., record types with array column). This currently won't + // handle range types because those rows don't have child OID information. Arrays types + // are inserted after a successful insert of the element type. const std::string kTypeQuery = R"( SELECT oid, typname, typreceive, typbasetype, - typelem, + typarray, typrelid FROM pg_catalog.pg_type WHERE - (typreceive != 0 OR typname = 'aclitem') AND typtype != 'r' + (typreceive != 0 OR typname = 'aclitem') AND typtype != 'r' AND typreceive::TEXT != 'array_recv' ORDER BY - typelem + oid )"; // Create a new type resolver (this instance's type_resolver_ member @@ -230,7 +230,7 @@ static inline int32_t InsertPgTypeResult( const char* typreceive = PQgetvalue(result, row, 2); const uint32_t typbasetype = static_cast( std::strtol(PQgetvalue(result, row, 3), /*str_end=*/nullptr, /*base=*/10)); - const uint32_t typelem = static_cast( + const uint32_t typarray = static_cast( std::strtol(PQgetvalue(result, row, 4), /*str_end=*/nullptr, /*base=*/10)); const uint32_t typrelid = static_cast( std::strtol(PQgetvalue(result, row, 5), /*str_end=*/nullptr, /*base=*/10)); @@ -244,18 +244,19 @@ static inline int32_t InsertPgTypeResult( item.typname = typname; item.typreceive = typreceive; item.class_oid = typrelid; - if (typbasetype != 0) { - item.child_oid = typbasetype; - } else { - item.child_oid = typelem; - } + item.base_oid = typbasetype; - ArrowError err; - if (resolver->Insert(item, &err) == NANOARROW_OK) { - fprintf(stdout, "[v] %s\n", item.typname); - n_added++; - } else { - fprintf(stdout, "[X] %s: %s\n", item.typname, err.message); + int result = resolver->Insert(item, nullptr); + + // If there's an array type and the insert succeeded, add that now too + if (result == NANOARROW_OK && typarray != 0) { + std::string array_typname = StringBuilder("_", typname); + item.oid = typarray; + item.typname = array_typname.c_str(); + item.typreceive = "array_recv"; + item.child_oid = oid; + + resolver->Insert(item, nullptr); } } From 14e59292160003bb3b0008bbfe8bf03c9348e0aa Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 16:08:16 -0300 Subject: [PATCH 68/90] remove some more debugging --- c/driver/postgresql/database.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index e4a28aecee..ea2b825072 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -19,6 +19,8 @@ #include #include +#include +#include #include #include @@ -197,8 +199,6 @@ static inline int32_t InsertPgAttributeResult( if (type_oid != current_type_oid && !columns.empty()) { resolver->InsertClass(current_type_oid, columns); - fprintf(stdout, "Inserting class with oid %ld\n", - static_cast(current_type_oid)); columns.clear(); current_type_oid = type_oid; n_added++; @@ -209,8 +209,6 @@ static inline int32_t InsertPgAttributeResult( if (!columns.empty()) { resolver->InsertClass(current_type_oid, columns); - fprintf(stdout, "Inserting class with oid %ld\n", - static_cast(current_type_oid)); n_added++; } From cd72468215ce381ee70b1cd886b094bff2e823b2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 16:46:42 -0300 Subject: [PATCH 69/90] remove more debugging --- c/driver/postgresql/database.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index ea2b825072..08ff0fc2f1 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -139,7 +139,6 @@ ORDER BY auto resolver = std::make_shared(); // Insert record type definitions (this includes table schemas) - fprintf(stdout, "CLASS DEFINITIONS-----------\n"); pg_result* result = PQexec(conn, kColumnsQuery.c_str()); ExecStatusType pq_status = PQresultStatus(result); if (pq_status == PGRES_TUPLES_OK) { @@ -154,7 +153,6 @@ ORDER BY // Attempt filling the resolver a few times to handle recursive definitions. int32_t max_attempts = 3; for (int32_t i = 0; i < max_attempts; i++) { - fprintf(stdout, "TYPES [%d]-----------\n", i); result = PQexec(conn, kTypeQuery.c_str()); ExecStatusType pq_status = PQresultStatus(result); if (pq_status == PGRES_TUPLES_OK) { From 3ce92a986d9557a802bbf5339dedef8bedec0b92 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 10 Apr 2023 17:01:07 -0300 Subject: [PATCH 70/90] handle +test reverse lookup --- c/driver/postgresql/postgres_type.h | 61 ++++++++++++- c/driver/postgresql/postgres_type_test.cc | 100 ++++++++++++++++++++++ 2 files changed, 160 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index a765280256..0f7a6f945c 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -290,6 +290,18 @@ class PostgresTypeResolver { return NANOARROW_OK; } + ArrowErrorCode FindArray(uint32_t child_oid, PostgresType* type_out, + ArrowError* error) const { + auto array_oid_lookup = array_mapping_.find(child_oid); + if (array_oid_lookup == array_mapping_.end()) { + ArrowErrorSet(error, "Postgres array type with child oid %ld not found", + static_cast(child_oid)); // NOLINT(runtime/int) + return EINVAL; + } + + return Find(array_oid_lookup->second, type_out, error); + } + // Resolve the oid for a given type_id. Returns 0 if the oid cannot be // resolved. uint32_t GetOID(PostgresTypeId type_id) const { @@ -314,7 +326,7 @@ class PostgresTypeResolver { return ENOTSUP; } - const PostgresType& base = (*result).second; + const PostgresType& base = result->second; PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); switch (base.type_id()) { @@ -323,6 +335,7 @@ class PostgresTypeResolver { NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); reverse_mapping_.insert({base.type_id(), item.oid}); + array_mapping_.insert({child.oid(), item.oid}); break; } @@ -380,6 +393,7 @@ class PostgresTypeResolver { private: std::unordered_map mapping_; std::unordered_map reverse_mapping_; + std::unordered_map array_mapping_; std::unordered_map>> classes_; std::unordered_map base_; @@ -411,6 +425,51 @@ class PostgresTypeResolver { } }; +static inline ArrowErrorCode PostgresTypeFromSchema(const PostgresTypeResolver& resolver, + ArrowSchema* schema, + PostgresType* out, + ArrowError* error) { + ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + return resolver.Find(resolver.GetOID(PG_TYPE_BOOL), out, error); + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT16: + return resolver.Find(resolver.GetOID(PG_TYPE_INT2), out, error); + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT32: + return resolver.Find(resolver.GetOID(PG_TYPE_INT4), out, error); + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT64: + return resolver.Find(resolver.GetOID(PG_TYPE_INT8), out, error); + case NANOARROW_TYPE_FLOAT: + return resolver.Find(resolver.GetOID(PG_TYPE_FLOAT4), out, error); + case NANOARROW_TYPE_DOUBLE: + return resolver.Find(resolver.GetOID(PG_TYPE_FLOAT8), out, error); + case NANOARROW_TYPE_STRING: + return resolver.Find(resolver.GetOID(PG_TYPE_TEXT), out, error); + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + return resolver.Find(resolver.GetOID(PG_TYPE_BYTEA), out, error); + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_FIXED_SIZE_LIST: { + PostgresType child; + NANOARROW_RETURN_NOT_OK( + PostgresTypeFromSchema(resolver, schema->children[0], &child, error)); + return resolver.FindArray(child.oid(), out, error); + } + + default: + ArrowErrorSet(error, "Can't map Arrow type '%s' to Postgres type", + ArrowTypeString(schema_view.type)); + return ENOTSUP; + } +} + static inline const char* PostgresTyprecv(PostgresTypeId type_id) { switch (type_id) { case PG_TYPE_ACLITEM: diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index aabbf1cf57..3e32be478a 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -195,6 +195,102 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { schema.release(&schema); } +TEST(PostgresTypeTest, PostgresTypeFromSchema) { + ArrowSchema schema; + PostgresType type; + MockTypeResolver resolver; + ASSERT_EQ(resolver.Init(), NANOARROW_OK); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BOOL), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT8), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT2); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT8), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT2); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT16), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT2); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT16), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT4); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT32), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT4); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT32), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT8); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT64), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT8); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_FLOAT), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_FLOAT4); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_DOUBLE), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_FLOAT8); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BINARY), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BYTEA); + schema.release(&schema); + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_STRING), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_TEXT); + schema.release(&schema); + + ArrowSchemaInit(&schema); + ASSERT_EQ(ArrowSchemaSetType(&schema, NANOARROW_TYPE_LIST), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_BOOL), NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_ARRAY); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); + schema.release(&schema); + + ArrowError error; + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO), + NANOARROW_OK); + EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, &error), ENOTSUP); + EXPECT_STREQ(error.message, + "Can't map Arrow type 'interval_month_day_nano' to Postgres type"); + schema.release(&schema); +} + TEST(PostgresTypeTest, PostgresTypeResolver) { PostgresTypeResolver resolver; ArrowError error; @@ -265,6 +361,10 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(type.child(0).oid(), 10); EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); + // Check reverse lookup of array type from item type + EXPECT_EQ(resolver.FindArray(10, &type, &error), NANOARROW_OK); + EXPECT_EQ(type.oid(), 11); + // Check insert/resolve of range type item.oid = 12; item.typname = "some_range_type_name"; From e618e5927c5e026177d22d9b8bd47f13f5ef8172 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 14 Apr 2023 10:50:16 -0300 Subject: [PATCH 71/90] maybe fix on old gcc --- c/driver/postgresql/postgres_type.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 0f7a6f945c..75342ed1b4 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -305,7 +305,7 @@ class PostgresTypeResolver { // Resolve the oid for a given type_id. Returns 0 if the oid cannot be // resolved. uint32_t GetOID(PostgresTypeId type_id) const { - auto result = reverse_mapping_.find(type_id); + auto result = reverse_mapping_.find(static_cast(type_id)); if (result == reverse_mapping_.end()) { return 0; } else { @@ -334,7 +334,7 @@ class PostgresTypeResolver { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); - reverse_mapping_.insert({base.type_id(), item.oid}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); array_mapping_.insert({child.oid(), item.oid}); break; } @@ -351,7 +351,7 @@ class PostgresTypeResolver { } mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); - reverse_mapping_.insert({base.type_id(), item.oid}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); break; } @@ -359,7 +359,7 @@ class PostgresTypeResolver { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); - reverse_mapping_.insert({base.type_id(), item.oid}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); break; } @@ -367,13 +367,13 @@ class PostgresTypeResolver { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); - reverse_mapping_.insert({base.type_id(), item.oid}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); break; } default: mapping_.insert({item.oid, type}); - reverse_mapping_.insert({base.type_id(), item.oid}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); break; } @@ -392,7 +392,9 @@ class PostgresTypeResolver { private: std::unordered_map mapping_; - std::unordered_map reverse_mapping_; + // We can't use PostgresTypeId as an unordered map key because there is no + // built-in hasher for an enum on gcc 4.8 (i.e., R 3.6 on Windows). + std::unordered_map reverse_mapping_; std::unordered_map array_mapping_; std::unordered_map>> classes_; std::unordered_map base_; From d892fa3f9f4d861ce2e8add61e02d9b342c1858d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 14 Apr 2023 11:09:16 -0300 Subject: [PATCH 72/90] remove unused --- c/driver/postgresql/postgresql.cc | 2 -- c/driver/postgresql/statement.cc | 1 - 2 files changed, 3 deletions(-) diff --git a/c/driver/postgresql/postgresql.cc b/c/driver/postgresql/postgresql.cc index d4be5ce82a..8cb998d17a 100644 --- a/c/driver/postgresql/postgresql.cc +++ b/c/driver/postgresql/postgresql.cc @@ -307,8 +307,6 @@ AdbcStatusCode PostgresStatementExecutePartitions(struct AdbcStatement* statemen int64_t* rows_affected, struct AdbcError* error) { if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); return ADBC_STATUS_NOT_IMPLEMENTED; } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 31a2900bec..97df575129 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -112,7 +112,6 @@ AdbcStatusCode InferSchema(const PostgresTypeResolver& type_resolver, PGresult* ArrowSchemaInit(out); CHECK_NA_ADBC(ArrowSchemaSetTypeStruct(out, num_fields), error); for (int i = 0; i < num_fields; i++) { - ArrowType field_type = NANOARROW_TYPE_NA; const Oid pg_oid = PQftype(result, i); PostgresType pg_type; if (type_resolver.Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) { From 1bfab0c6fc12049b856c4e6c215a3c9dfde6d3a1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 14 Apr 2023 14:42:56 -0300 Subject: [PATCH 73/90] maybe fix windows R build --- r/adbcpostgresql/src/Makevars.ucrt | 1 - r/adbcpostgresql/src/Makevars.win | 1 - 2 files changed, 2 deletions(-) diff --git a/r/adbcpostgresql/src/Makevars.ucrt b/r/adbcpostgresql/src/Makevars.ucrt index ef55b83b51..0fc2d0c0af 100644 --- a/r/adbcpostgresql/src/Makevars.ucrt +++ b/r/adbcpostgresql/src/Makevars.ucrt @@ -22,6 +22,5 @@ OBJECTS = init.o \ connection.o \ database.o \ statement.o \ - type.o \ postgresql.o \ nanoarrow/nanoarrow.o diff --git a/r/adbcpostgresql/src/Makevars.win b/r/adbcpostgresql/src/Makevars.win index e930737f22..a45dc513ea 100644 --- a/r/adbcpostgresql/src/Makevars.win +++ b/r/adbcpostgresql/src/Makevars.win @@ -25,7 +25,6 @@ OBJECTS = init.o \ connection.o \ database.o \ statement.o \ - type.o \ postgresql.o \ nanoarrow/nanoarrow.o From cb2d5e92bf85989fa1843dac0b4508034e6e41c1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 14 Apr 2023 14:44:35 -0300 Subject: [PATCH 74/90] fix sign compare --- c/driver/postgresql/statement.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 97df575129..66dc661a63 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -387,7 +387,7 @@ int TupleReader::GetNext(struct ArrowArray* out) { kPgCopyBinarySignature.size() + sizeof(uint32_t) + sizeof(uint32_t); // https://www.postgresql.org/docs/14/sql-copy.html#id-1.9.3.55.9.4.5 const int size = PQgetCopyData(conn_, &pgbuf_, /*async=*/0); - if (size < kPqHeaderLength) { + if (size < static_cast(kPqHeaderLength)) { return EIO; } else if (std::strcmp(pgbuf_, kPgCopyBinarySignature.data()) != 0) { return EIO; From 92394bf0bd176e1d15f65f380f7109252fd79f41 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:16:54 -0300 Subject: [PATCH 75/90] rename types --- c/driver/postgresql/postgres_type.h | 696 +++++++++++----------- c/driver/postgresql/postgres_type_test.cc | 90 +-- c/driver/postgresql/statement.cc | 10 +- 3 files changed, 398 insertions(+), 398 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 75342ed1b4..b08897daa5 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -30,86 +30,86 @@ namespace adbcpq { // An enum of the types available in most Postgres pg_type tables enum PostgresTypeId { - PG_TYPE_UNINITIALIZED, - PG_TYPE_ACLITEM, - PG_TYPE_ANYARRAY, - PG_TYPE_ANYCOMPATIBLEARRAY, - PG_TYPE_ARRAY, - PG_TYPE_BIT, - PG_TYPE_BOOL, - PG_TYPE_BOX, - PG_TYPE_BPCHAR, - PG_TYPE_BRIN_BLOOM_SUMMARY, - PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY, - PG_TYPE_BYTEA, - PG_TYPE_CASH, - PG_TYPE_CHAR, - PG_TYPE_CIDR, - PG_TYPE_CID, - PG_TYPE_CIRCLE, - PG_TYPE_CSTRING, - PG_TYPE_DATE, - PG_TYPE_DOMAIN, - PG_TYPE_FLOAT4, - PG_TYPE_FLOAT8, - PG_TYPE_INET, - PG_TYPE_INT2, - PG_TYPE_INT2VECTOR, - PG_TYPE_INT4, - PG_TYPE_INT8, - PG_TYPE_INTERVAL, - PG_TYPE_JSON, - PG_TYPE_JSONB, - PG_TYPE_JSONPATH, - PG_TYPE_LINE, - PG_TYPE_LSEG, - PG_TYPE_MACADDR, - PG_TYPE_MACADDR8, - PG_TYPE_MULTIRANGE, - PG_TYPE_NAME, - PG_TYPE_NUMERIC, - PG_TYPE_OID, - PG_TYPE_OIDVECTOR, - PG_TYPE_PATH, - PG_TYPE_PG_DDL_COMMAND, - PG_TYPE_PG_DEPENDENCIES, - PG_TYPE_PG_LSN, - PG_TYPE_PG_MCV_LIST, - PG_TYPE_PG_NDISTINCT, - PG_TYPE_PG_NODE_TREE, - PG_TYPE_PG_SNAPSHOT, - PG_TYPE_POINT, - PG_TYPE_POLY, - PG_TYPE_RANGE, - PG_TYPE_RECORD, - PG_TYPE_REGCLASS, - PG_TYPE_REGCOLLATION, - PG_TYPE_REGCONFIG, - PG_TYPE_REGDICTIONARY, - PG_TYPE_REGNAMESPACE, - PG_TYPE_REGOPERATOR, - PG_TYPE_REGOPER, - PG_TYPE_REGPROCEDURE, - PG_TYPE_REGPROC, - PG_TYPE_REGROLE, - PG_TYPE_REGTYPE, - PG_TYPE_TEXT, - PG_TYPE_TID, - PG_TYPE_TIME, - PG_TYPE_TIMESTAMP, - PG_TYPE_TIMESTAMPTZ, - PG_TYPE_TIMETZ, - PG_TYPE_TSQUERY, - PG_TYPE_TSVECTOR, - PG_TYPE_TXID_SNAPSHOT, - PG_TYPE_UNKNOWN, - PG_TYPE_UUID, - PG_TYPE_VARBIT, - PG_TYPE_VARCHAR, - PG_TYPE_VOID, - PG_TYPE_XID8, - PG_TYPE_XID, - PG_TYPE_XML + TYPE_ID_UNINITIALIZED, + TYPE_ID_ACLITEM, + TYPE_ID_ANYARRAY, + TYPE_ID_ANYCOMPATIBLEARRAY, + TYPE_ID_ARRAY, + TYPE_ID_BIT, + TYPE_ID_BOOL, + TYPE_ID_BOX, + TYPE_ID_BPCHAR, + TYPE_ID_BRIN_BLOOM_SUMMARY, + TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY, + TYPE_ID_BYTEA, + TYPE_ID_CASH, + TYPE_ID_CHAR, + TYPE_ID_CIDR, + TYPE_ID_CID, + TYPE_ID_CIRCLE, + TYPE_ID_CSTRING, + TYPE_ID_DATE, + TYPE_ID_DOMAIN, + TYPE_ID_FLOAT4, + TYPE_ID_FLOAT8, + TYPE_ID_INET, + TYPE_ID_INT2, + TYPE_ID_INT2VECTOR, + TYPE_ID_INT4, + TYPE_ID_INT8, + TYPE_ID_INTERVAL, + TYPE_ID_JSON, + TYPE_ID_JSONB, + TYPE_ID_JSONPATH, + TYPE_ID_LINE, + TYPE_ID_LSEG, + TYPE_ID_MACADDR, + TYPE_ID_MACADDR8, + TYPE_ID_MULTIRANGE, + TYPE_ID_NAME, + TYPE_ID_NUMERIC, + TYPE_ID_OID, + TYPE_ID_OIDVECTOR, + TYPE_ID_PATH, + TYPE_ID_PG_DDL_COMMAND, + TYPE_ID_PG_DEPENDENCIES, + TYPE_ID_PG_LSN, + TYPE_ID_PG_MCV_LIST, + TYPE_ID_PG_NDISTINCT, + TYPE_ID_PG_NODE_TREE, + TYPE_ID_PG_SNAPSHOT, + TYPE_ID_POINT, + TYPE_ID_POLY, + TYPE_ID_RANGE, + TYPE_ID_RECORD, + TYPE_ID_REGCLASS, + TYPE_ID_REGCOLLATION, + TYPE_ID_REGCONFIG, + TYPE_ID_REGDICTIONARY, + TYPE_ID_REGNAMESPACE, + TYPE_ID_REGOPERATOR, + TYPE_ID_REGOPER, + TYPE_ID_REGPROCEDURE, + TYPE_ID_REGPROC, + TYPE_ID_REGROLE, + TYPE_ID_REGTYPE, + TYPE_ID_TEXT, + TYPE_ID_TID, + TYPE_ID_TIME, + TYPE_ID_TIMESTAMP, + TYPE_ID_TIMESTAMPTZ, + TYPE_ID_TIMETZ, + TYPE_ID_TSQUERY, + TYPE_ID_TSVECTOR, + TYPE_ID_TXID_SNAPSHOT, + TYPE_ID_UNKNOWN, + TYPE_ID_UUID, + TYPE_ID_VARBIT, + TYPE_ID_VARCHAR, + TYPE_ID_VOID, + TYPE_ID_XID8, + TYPE_ID_XID, + TYPE_ID_XML }; // Returns the receive function name as defined in the typrecieve column @@ -122,8 +122,8 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id); // in the pg_type typname column. static inline const char* PostgresTypname(PostgresTypeId type_id); -// A vector of all type IDs, optionally including the nested types PG_TYPE_ARRAY, -// PG_TYPE_DOMAIN, PG_TYPE_RECORD, and PG_TYPE_RANGE. +// A vector of all type IDs, optionally including the nested types TYPE_ID_ARRAY, +// TYPE_ID_DOMAIN, TYPE_ID_RECORD, and TYPE_ID_RANGE. static inline std::vector PostgresTypeIdAll(bool nested = true); // An abstraction of a (potentially nested and/or parameterized) Postgres @@ -133,7 +133,7 @@ class PostgresType { public: explicit PostgresType(PostgresTypeId type_id) : oid_(0), type_id_(type_id) {} - PostgresType() : PostgresType(PG_TYPE_UNINITIALIZED) {} + PostgresType() : PostgresType(TYPE_ID_UNINITIALIZED) {} void AppendChild(const std::string& field_name, const PostgresType& type) { PostgresType child(type); @@ -154,7 +154,7 @@ class PostgresType { } PostgresType Array(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(PG_TYPE_ARRAY); + PostgresType out(TYPE_ID_ARRAY); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -166,7 +166,7 @@ class PostgresType { } PostgresType Range(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(PG_TYPE_RANGE); + PostgresType out(TYPE_ID_RANGE); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -189,42 +189,42 @@ class PostgresType { // binary COPY representation in the output. ArrowErrorCode SetSchema(ArrowSchema* schema) const { switch (type_id_) { - case PG_TYPE_BOOL: + case TYPE_ID_BOOL: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; - case PG_TYPE_INT2: + case TYPE_ID_INT2: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); break; - case PG_TYPE_INT4: + case TYPE_ID_INT4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); break; - case PG_TYPE_INT8: + case TYPE_ID_INT8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); break; - case PG_TYPE_FLOAT4: + case TYPE_ID_FLOAT4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); break; - case PG_TYPE_FLOAT8: + case TYPE_ID_FLOAT8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; - case PG_TYPE_CHAR: - case PG_TYPE_BPCHAR: - case PG_TYPE_VARCHAR: - case PG_TYPE_TEXT: + case TYPE_ID_CHAR: + case TYPE_ID_BPCHAR: + case TYPE_ID_VARCHAR: + case TYPE_ID_TEXT: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; - case PG_TYPE_BYTEA: + case TYPE_ID_BYTEA: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; - case PG_TYPE_RECORD: + case TYPE_ID_RECORD: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); for (int64_t i = 0; i < n_children(); i++) { NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); } break; - case PG_TYPE_ARRAY: + case TYPE_ID_ARRAY: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); break; @@ -330,7 +330,7 @@ class PostgresTypeResolver { PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); switch (base.type_id()) { - case PG_TYPE_ARRAY: { + case TYPE_ID_ARRAY: { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); @@ -339,11 +339,11 @@ class PostgresTypeResolver { break; } - case PG_TYPE_RECORD: { + case TYPE_ID_RECORD: { std::vector> child_desc; NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); - PostgresType out(PG_TYPE_RECORD); + PostgresType out(TYPE_ID_RECORD); for (const auto& child_item : child_desc) { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); @@ -355,7 +355,7 @@ class PostgresTypeResolver { break; } - case PG_TYPE_DOMAIN: { + case TYPE_ID_DOMAIN: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); @@ -363,7 +363,7 @@ class PostgresTypeResolver { break; } - case PG_TYPE_RANGE: { + case TYPE_ID_RANGE: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); @@ -436,26 +436,26 @@ static inline ArrowErrorCode PostgresTypeFromSchema(const PostgresTypeResolver& switch (schema_view.type) { case NANOARROW_TYPE_BOOL: - return resolver.Find(resolver.GetOID(PG_TYPE_BOOL), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_BOOL), out, error); case NANOARROW_TYPE_INT8: case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT16: - return resolver.Find(resolver.GetOID(PG_TYPE_INT2), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_INT2), out, error); case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT32: - return resolver.Find(resolver.GetOID(PG_TYPE_INT4), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_INT4), out, error); case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT64: - return resolver.Find(resolver.GetOID(PG_TYPE_INT8), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_INT8), out, error); case NANOARROW_TYPE_FLOAT: - return resolver.Find(resolver.GetOID(PG_TYPE_FLOAT4), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_FLOAT4), out, error); case NANOARROW_TYPE_DOUBLE: - return resolver.Find(resolver.GetOID(PG_TYPE_FLOAT8), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_FLOAT8), out, error); case NANOARROW_TYPE_STRING: - return resolver.Find(resolver.GetOID(PG_TYPE_TEXT), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_TEXT), out, error); case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return resolver.Find(resolver.GetOID(PG_TYPE_BYTEA), out, error); + return resolver.Find(resolver.GetOID(TYPE_ID_BYTEA), out, error); case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: { @@ -474,163 +474,163 @@ static inline ArrowErrorCode PostgresTypeFromSchema(const PostgresTypeResolver& static inline const char* PostgresTyprecv(PostgresTypeId type_id) { switch (type_id) { - case PG_TYPE_ACLITEM: + case TYPE_ID_ACLITEM: return "aclitem_recv"; - case PG_TYPE_ANYARRAY: + case TYPE_ID_ANYARRAY: return "anyarray_recv"; - case PG_TYPE_ANYCOMPATIBLEARRAY: + case TYPE_ID_ANYCOMPATIBLEARRAY: return "anycompatiblearray_recv"; - case PG_TYPE_ARRAY: + case TYPE_ID_ARRAY: return "array_recv"; - case PG_TYPE_BIT: + case TYPE_ID_BIT: return "bit_recv"; - case PG_TYPE_BOOL: + case TYPE_ID_BOOL: return "boolrecv"; - case PG_TYPE_BOX: + case TYPE_ID_BOX: return "box_recv"; - case PG_TYPE_BPCHAR: + case TYPE_ID_BPCHAR: return "bpcharrecv"; - case PG_TYPE_BRIN_BLOOM_SUMMARY: + case TYPE_ID_BRIN_BLOOM_SUMMARY: return "brin_bloom_summary_recv"; - case PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY: + case TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY: return "brin_minmax_multi_summary_recv"; - case PG_TYPE_BYTEA: + case TYPE_ID_BYTEA: return "bytearecv"; - case PG_TYPE_CASH: + case TYPE_ID_CASH: return "cash_recv"; - case PG_TYPE_CHAR: + case TYPE_ID_CHAR: return "charrecv"; - case PG_TYPE_CIDR: + case TYPE_ID_CIDR: return "cidr_recv"; - case PG_TYPE_CID: + case TYPE_ID_CID: return "cidrecv"; - case PG_TYPE_CIRCLE: + case TYPE_ID_CIRCLE: return "circle_recv"; - case PG_TYPE_CSTRING: + case TYPE_ID_CSTRING: return "cstring_recv"; - case PG_TYPE_DATE: + case TYPE_ID_DATE: return "date_recv"; - case PG_TYPE_DOMAIN: + case TYPE_ID_DOMAIN: return "domain_recv"; - case PG_TYPE_FLOAT4: + case TYPE_ID_FLOAT4: return "float4recv"; - case PG_TYPE_FLOAT8: + case TYPE_ID_FLOAT8: return "float8recv"; - case PG_TYPE_INET: + case TYPE_ID_INET: return "inet_recv"; - case PG_TYPE_INT2: + case TYPE_ID_INT2: return "int2recv"; - case PG_TYPE_INT2VECTOR: + case TYPE_ID_INT2VECTOR: return "int2vectorrecv"; - case PG_TYPE_INT4: + case TYPE_ID_INT4: return "int4recv"; - case PG_TYPE_INT8: + case TYPE_ID_INT8: return "int8recv"; - case PG_TYPE_INTERVAL: + case TYPE_ID_INTERVAL: return "interval_recv"; - case PG_TYPE_JSON: + case TYPE_ID_JSON: return "json_recv"; - case PG_TYPE_JSONB: + case TYPE_ID_JSONB: return "jsonb_recv"; - case PG_TYPE_JSONPATH: + case TYPE_ID_JSONPATH: return "jsonpath_recv"; - case PG_TYPE_LINE: + case TYPE_ID_LINE: return "line_recv"; - case PG_TYPE_LSEG: + case TYPE_ID_LSEG: return "lseg_recv"; - case PG_TYPE_MACADDR: + case TYPE_ID_MACADDR: return "macaddr_recv"; - case PG_TYPE_MACADDR8: + case TYPE_ID_MACADDR8: return "macaddr8_recv"; - case PG_TYPE_MULTIRANGE: + case TYPE_ID_MULTIRANGE: return "multirange_recv"; - case PG_TYPE_NAME: + case TYPE_ID_NAME: return "namerecv"; - case PG_TYPE_NUMERIC: + case TYPE_ID_NUMERIC: return "numeric_recv"; - case PG_TYPE_OID: + case TYPE_ID_OID: return "oidrecv"; - case PG_TYPE_OIDVECTOR: + case TYPE_ID_OIDVECTOR: return "oidvectorrecv"; - case PG_TYPE_PATH: + case TYPE_ID_PATH: return "path_recv"; - case PG_TYPE_PG_NODE_TREE: + case TYPE_ID_PG_NODE_TREE: return "pg_node_tree_recv"; - case PG_TYPE_PG_NDISTINCT: + case TYPE_ID_PG_NDISTINCT: return "pg_ndistinct_recv"; - case PG_TYPE_PG_DEPENDENCIES: + case TYPE_ID_PG_DEPENDENCIES: return "pg_dependencies_recv"; - case PG_TYPE_PG_LSN: + case TYPE_ID_PG_LSN: return "pg_lsn_recv"; - case PG_TYPE_PG_MCV_LIST: + case TYPE_ID_PG_MCV_LIST: return "pg_mcv_list_recv"; - case PG_TYPE_PG_DDL_COMMAND: + case TYPE_ID_PG_DDL_COMMAND: return "pg_ddl_command_recv"; - case PG_TYPE_PG_SNAPSHOT: + case TYPE_ID_PG_SNAPSHOT: return "pg_snapshot_recv"; - case PG_TYPE_POINT: + case TYPE_ID_POINT: return "point_recv"; - case PG_TYPE_POLY: + case TYPE_ID_POLY: return "poly_recv"; - case PG_TYPE_RANGE: + case TYPE_ID_RANGE: return "range_recv"; - case PG_TYPE_RECORD: + case TYPE_ID_RECORD: return "record_recv"; - case PG_TYPE_REGCLASS: + case TYPE_ID_REGCLASS: return "regclassrecv"; - case PG_TYPE_REGCOLLATION: + case TYPE_ID_REGCOLLATION: return "regcollationrecv"; - case PG_TYPE_REGCONFIG: + case TYPE_ID_REGCONFIG: return "regconfigrecv"; - case PG_TYPE_REGDICTIONARY: + case TYPE_ID_REGDICTIONARY: return "regdictionaryrecv"; - case PG_TYPE_REGNAMESPACE: + case TYPE_ID_REGNAMESPACE: return "regnamespacerecv"; - case PG_TYPE_REGOPERATOR: + case TYPE_ID_REGOPERATOR: return "regoperatorrecv"; - case PG_TYPE_REGOPER: + case TYPE_ID_REGOPER: return "regoperrecv"; - case PG_TYPE_REGPROCEDURE: + case TYPE_ID_REGPROCEDURE: return "regprocedurerecv"; - case PG_TYPE_REGPROC: + case TYPE_ID_REGPROC: return "regprocrecv"; - case PG_TYPE_REGROLE: + case TYPE_ID_REGROLE: return "regrolerecv"; - case PG_TYPE_REGTYPE: + case TYPE_ID_REGTYPE: return "regtyperecv"; - case PG_TYPE_TEXT: + case TYPE_ID_TEXT: return "textrecv"; - case PG_TYPE_TID: + case TYPE_ID_TID: return "tidrecv"; - case PG_TYPE_TIME: + case TYPE_ID_TIME: return "time_recv"; - case PG_TYPE_TIMESTAMP: + case TYPE_ID_TIMESTAMP: return "timestamp_recv"; - case PG_TYPE_TIMESTAMPTZ: + case TYPE_ID_TIMESTAMPTZ: return "timestamptz_recv"; - case PG_TYPE_TIMETZ: + case TYPE_ID_TIMETZ: return "timetz_recv"; - case PG_TYPE_TSQUERY: + case TYPE_ID_TSQUERY: return "tsqueryrecv"; - case PG_TYPE_TSVECTOR: + case TYPE_ID_TSVECTOR: return "tsvectorrecv"; - case PG_TYPE_TXID_SNAPSHOT: + case TYPE_ID_TXID_SNAPSHOT: return "txid_snapshot_recv"; - case PG_TYPE_UNKNOWN: + case TYPE_ID_UNKNOWN: return "unknownrecv"; - case PG_TYPE_UUID: + case TYPE_ID_UUID: return "uuid_recv"; - case PG_TYPE_VARBIT: + case TYPE_ID_VARBIT: return "varbit_recv"; - case PG_TYPE_VARCHAR: + case TYPE_ID_VARCHAR: return "varcharrecv"; - case PG_TYPE_VOID: + case TYPE_ID_VOID: return "void_recv"; - case PG_TYPE_XID8: + case TYPE_ID_XID8: return "xid8recv"; - case PG_TYPE_XID: + case TYPE_ID_XID: return "xidrecv"; - case PG_TYPE_XML: + case TYPE_ID_XML: return "xml_recv"; default: return ""; @@ -639,163 +639,163 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id) { static inline const char* PostgresTypname(PostgresTypeId type_id) { switch (type_id) { - case PG_TYPE_ACLITEM: + case TYPE_ID_ACLITEM: return "aclitem"; - case PG_TYPE_ANYARRAY: + case TYPE_ID_ANYARRAY: return "anyarray"; - case PG_TYPE_ANYCOMPATIBLEARRAY: + case TYPE_ID_ANYCOMPATIBLEARRAY: return "anycompatiblearray"; - case PG_TYPE_ARRAY: + case TYPE_ID_ARRAY: return "array"; - case PG_TYPE_BIT: + case TYPE_ID_BIT: return "bit"; - case PG_TYPE_BOOL: + case TYPE_ID_BOOL: return "bool"; - case PG_TYPE_BOX: + case TYPE_ID_BOX: return "box"; - case PG_TYPE_BPCHAR: + case TYPE_ID_BPCHAR: return "bpchar"; - case PG_TYPE_BRIN_BLOOM_SUMMARY: + case TYPE_ID_BRIN_BLOOM_SUMMARY: return "brin_bloom_summary"; - case PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY: + case TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY: return "brin_minmax_multi_summary"; - case PG_TYPE_BYTEA: + case TYPE_ID_BYTEA: return "bytea"; - case PG_TYPE_CASH: + case TYPE_ID_CASH: return "cash"; - case PG_TYPE_CHAR: + case TYPE_ID_CHAR: return "char"; - case PG_TYPE_CIDR: + case TYPE_ID_CIDR: return "cidr"; - case PG_TYPE_CID: + case TYPE_ID_CID: return "cid"; - case PG_TYPE_CIRCLE: + case TYPE_ID_CIRCLE: return "circle"; - case PG_TYPE_CSTRING: + case TYPE_ID_CSTRING: return "cstring"; - case PG_TYPE_DATE: + case TYPE_ID_DATE: return "date"; - case PG_TYPE_DOMAIN: + case TYPE_ID_DOMAIN: return "domain"; - case PG_TYPE_FLOAT4: + case TYPE_ID_FLOAT4: return "float4"; - case PG_TYPE_FLOAT8: + case TYPE_ID_FLOAT8: return "float8"; - case PG_TYPE_INET: + case TYPE_ID_INET: return "inet"; - case PG_TYPE_INT2: + case TYPE_ID_INT2: return "int2"; - case PG_TYPE_INT2VECTOR: + case TYPE_ID_INT2VECTOR: return "int2vector"; - case PG_TYPE_INT4: + case TYPE_ID_INT4: return "int4"; - case PG_TYPE_INT8: + case TYPE_ID_INT8: return "int8"; - case PG_TYPE_INTERVAL: + case TYPE_ID_INTERVAL: return "interval"; - case PG_TYPE_JSON: + case TYPE_ID_JSON: return "json"; - case PG_TYPE_JSONB: + case TYPE_ID_JSONB: return "jsonb"; - case PG_TYPE_JSONPATH: + case TYPE_ID_JSONPATH: return "jsonpath"; - case PG_TYPE_LINE: + case TYPE_ID_LINE: return "line"; - case PG_TYPE_LSEG: + case TYPE_ID_LSEG: return "lseg"; - case PG_TYPE_MACADDR: + case TYPE_ID_MACADDR: return "macaddr"; - case PG_TYPE_MACADDR8: + case TYPE_ID_MACADDR8: return "macaddr8"; - case PG_TYPE_MULTIRANGE: + case TYPE_ID_MULTIRANGE: return "multirange"; - case PG_TYPE_NAME: + case TYPE_ID_NAME: return "name"; - case PG_TYPE_NUMERIC: + case TYPE_ID_NUMERIC: return "numeric"; - case PG_TYPE_OID: + case TYPE_ID_OID: return "oid"; - case PG_TYPE_OIDVECTOR: + case TYPE_ID_OIDVECTOR: return "oidvector"; - case PG_TYPE_PATH: + case TYPE_ID_PATH: return "path"; - case PG_TYPE_PG_NODE_TREE: + case TYPE_ID_PG_NODE_TREE: return "pg_node_tree"; - case PG_TYPE_PG_NDISTINCT: + case TYPE_ID_PG_NDISTINCT: return "pg_ndistinct"; - case PG_TYPE_PG_DEPENDENCIES: + case TYPE_ID_PG_DEPENDENCIES: return "pg_dependencies"; - case PG_TYPE_PG_LSN: + case TYPE_ID_PG_LSN: return "pg_lsn"; - case PG_TYPE_PG_MCV_LIST: + case TYPE_ID_PG_MCV_LIST: return "pg_mcv_list"; - case PG_TYPE_PG_DDL_COMMAND: + case TYPE_ID_PG_DDL_COMMAND: return "pg_ddl_command"; - case PG_TYPE_PG_SNAPSHOT: + case TYPE_ID_PG_SNAPSHOT: return "pg_snapshot"; - case PG_TYPE_POINT: + case TYPE_ID_POINT: return "point"; - case PG_TYPE_POLY: + case TYPE_ID_POLY: return "poly"; - case PG_TYPE_RANGE: + case TYPE_ID_RANGE: return "range"; - case PG_TYPE_RECORD: + case TYPE_ID_RECORD: return "record"; - case PG_TYPE_REGCLASS: + case TYPE_ID_REGCLASS: return "regclass"; - case PG_TYPE_REGCOLLATION: + case TYPE_ID_REGCOLLATION: return "regcollation"; - case PG_TYPE_REGCONFIG: + case TYPE_ID_REGCONFIG: return "regconfig"; - case PG_TYPE_REGDICTIONARY: + case TYPE_ID_REGDICTIONARY: return "regdictionary"; - case PG_TYPE_REGNAMESPACE: + case TYPE_ID_REGNAMESPACE: return "regnamespace"; - case PG_TYPE_REGOPERATOR: + case TYPE_ID_REGOPERATOR: return "regoperator"; - case PG_TYPE_REGOPER: + case TYPE_ID_REGOPER: return "regoper"; - case PG_TYPE_REGPROCEDURE: + case TYPE_ID_REGPROCEDURE: return "regprocedure"; - case PG_TYPE_REGPROC: + case TYPE_ID_REGPROC: return "regproc"; - case PG_TYPE_REGROLE: + case TYPE_ID_REGROLE: return "regrole"; - case PG_TYPE_REGTYPE: + case TYPE_ID_REGTYPE: return "regtype"; - case PG_TYPE_TEXT: + case TYPE_ID_TEXT: return "text"; - case PG_TYPE_TID: + case TYPE_ID_TID: return "tid"; - case PG_TYPE_TIME: + case TYPE_ID_TIME: return "time"; - case PG_TYPE_TIMESTAMP: + case TYPE_ID_TIMESTAMP: return "timestamp"; - case PG_TYPE_TIMESTAMPTZ: + case TYPE_ID_TIMESTAMPTZ: return "timestamptz"; - case PG_TYPE_TIMETZ: + case TYPE_ID_TIMETZ: return "timetz"; - case PG_TYPE_TSQUERY: + case TYPE_ID_TSQUERY: return "tsquery"; - case PG_TYPE_TSVECTOR: + case TYPE_ID_TSVECTOR: return "tsvector"; - case PG_TYPE_TXID_SNAPSHOT: + case TYPE_ID_TXID_SNAPSHOT: return "txid_snapshot"; - case PG_TYPE_UNKNOWN: + case TYPE_ID_UNKNOWN: return "unknown"; - case PG_TYPE_UUID: + case TYPE_ID_UUID: return "uuid"; - case PG_TYPE_VARBIT: + case TYPE_ID_VARBIT: return "varbit"; - case PG_TYPE_VARCHAR: + case TYPE_ID_VARCHAR: return "varchar"; - case PG_TYPE_VOID: + case TYPE_ID_VOID: return "void"; - case PG_TYPE_XID8: + case TYPE_ID_XID8: return "xid8"; - case PG_TYPE_XID: + case TYPE_ID_XID: return "xid"; - case PG_TYPE_XML: + case TYPE_ID_XML: return "xml"; default: return ""; @@ -803,87 +803,87 @@ static inline const char* PostgresTypname(PostgresTypeId type_id) { } static inline std::vector PostgresTypeIdAll(bool nested) { - std::vector base = {PG_TYPE_ACLITEM, - PG_TYPE_ANYARRAY, - PG_TYPE_ANYCOMPATIBLEARRAY, - PG_TYPE_BIT, - PG_TYPE_BOOL, - PG_TYPE_BOX, - PG_TYPE_BPCHAR, - PG_TYPE_BRIN_BLOOM_SUMMARY, - PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY, - PG_TYPE_BYTEA, - PG_TYPE_CASH, - PG_TYPE_CHAR, - PG_TYPE_CIDR, - PG_TYPE_CID, - PG_TYPE_CIRCLE, - PG_TYPE_CSTRING, - PG_TYPE_DATE, - PG_TYPE_FLOAT4, - PG_TYPE_FLOAT8, - PG_TYPE_INET, - PG_TYPE_INT2, - PG_TYPE_INT2VECTOR, - PG_TYPE_INT4, - PG_TYPE_INT8, - PG_TYPE_INTERVAL, - PG_TYPE_JSON, - PG_TYPE_JSONB, - PG_TYPE_JSONPATH, - PG_TYPE_LINE, - PG_TYPE_LSEG, - PG_TYPE_MACADDR, - PG_TYPE_MACADDR8, - PG_TYPE_MULTIRANGE, - PG_TYPE_NAME, - PG_TYPE_NUMERIC, - PG_TYPE_OID, - PG_TYPE_OIDVECTOR, - PG_TYPE_PATH, - PG_TYPE_PG_NODE_TREE, - PG_TYPE_PG_NDISTINCT, - PG_TYPE_PG_DEPENDENCIES, - PG_TYPE_PG_LSN, - PG_TYPE_PG_MCV_LIST, - PG_TYPE_PG_DDL_COMMAND, - PG_TYPE_PG_SNAPSHOT, - PG_TYPE_POINT, - PG_TYPE_POLY, - PG_TYPE_REGCLASS, - PG_TYPE_REGCOLLATION, - PG_TYPE_REGCONFIG, - PG_TYPE_REGDICTIONARY, - PG_TYPE_REGNAMESPACE, - PG_TYPE_REGOPERATOR, - PG_TYPE_REGOPER, - PG_TYPE_REGPROCEDURE, - PG_TYPE_REGPROC, - PG_TYPE_REGROLE, - PG_TYPE_REGTYPE, - PG_TYPE_TEXT, - PG_TYPE_TID, - PG_TYPE_TIME, - PG_TYPE_TIMESTAMP, - PG_TYPE_TIMESTAMPTZ, - PG_TYPE_TIMETZ, - PG_TYPE_TSQUERY, - PG_TYPE_TSVECTOR, - PG_TYPE_TXID_SNAPSHOT, - PG_TYPE_UNKNOWN, - PG_TYPE_UUID, - PG_TYPE_VARBIT, - PG_TYPE_VARCHAR, - PG_TYPE_VOID, - PG_TYPE_XID8, - PG_TYPE_XID, - PG_TYPE_XML}; + std::vector base = {TYPE_ID_ACLITEM, + TYPE_ID_ANYARRAY, + TYPE_ID_ANYCOMPATIBLEARRAY, + TYPE_ID_BIT, + TYPE_ID_BOOL, + TYPE_ID_BOX, + TYPE_ID_BPCHAR, + TYPE_ID_BRIN_BLOOM_SUMMARY, + TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY, + TYPE_ID_BYTEA, + TYPE_ID_CASH, + TYPE_ID_CHAR, + TYPE_ID_CIDR, + TYPE_ID_CID, + TYPE_ID_CIRCLE, + TYPE_ID_CSTRING, + TYPE_ID_DATE, + TYPE_ID_FLOAT4, + TYPE_ID_FLOAT8, + TYPE_ID_INET, + TYPE_ID_INT2, + TYPE_ID_INT2VECTOR, + TYPE_ID_INT4, + TYPE_ID_INT8, + TYPE_ID_INTERVAL, + TYPE_ID_JSON, + TYPE_ID_JSONB, + TYPE_ID_JSONPATH, + TYPE_ID_LINE, + TYPE_ID_LSEG, + TYPE_ID_MACADDR, + TYPE_ID_MACADDR8, + TYPE_ID_MULTIRANGE, + TYPE_ID_NAME, + TYPE_ID_NUMERIC, + TYPE_ID_OID, + TYPE_ID_OIDVECTOR, + TYPE_ID_PATH, + TYPE_ID_PG_NODE_TREE, + TYPE_ID_PG_NDISTINCT, + TYPE_ID_PG_DEPENDENCIES, + TYPE_ID_PG_LSN, + TYPE_ID_PG_MCV_LIST, + TYPE_ID_PG_DDL_COMMAND, + TYPE_ID_PG_SNAPSHOT, + TYPE_ID_POINT, + TYPE_ID_POLY, + TYPE_ID_REGCLASS, + TYPE_ID_REGCOLLATION, + TYPE_ID_REGCONFIG, + TYPE_ID_REGDICTIONARY, + TYPE_ID_REGNAMESPACE, + TYPE_ID_REGOPERATOR, + TYPE_ID_REGOPER, + TYPE_ID_REGPROCEDURE, + TYPE_ID_REGPROC, + TYPE_ID_REGROLE, + TYPE_ID_REGTYPE, + TYPE_ID_TEXT, + TYPE_ID_TID, + TYPE_ID_TIME, + TYPE_ID_TIMESTAMP, + TYPE_ID_TIMESTAMPTZ, + TYPE_ID_TIMETZ, + TYPE_ID_TSQUERY, + TYPE_ID_TSVECTOR, + TYPE_ID_TXID_SNAPSHOT, + TYPE_ID_UNKNOWN, + TYPE_ID_UUID, + TYPE_ID_VARBIT, + TYPE_ID_VARCHAR, + TYPE_ID_VOID, + TYPE_ID_XID8, + TYPE_ID_XID, + TYPE_ID_XML}; if (nested) { - base.push_back(PG_TYPE_ARRAY); - base.push_back(PG_TYPE_RECORD); - base.push_back(PG_TYPE_RANGE); - base.push_back(PG_TYPE_DOMAIN); + base.push_back(TYPE_ID_ARRAY); + base.push_back(TYPE_ID_RECORD); + base.push_back(TYPE_ID_RANGE); + base.push_back(TYPE_ID_DOMAIN); } return base; diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 3e32be478a..deccdf361b 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -47,26 +47,26 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; item.typname = "_bool"; item.typreceive = "array_recv"; - item.child_oid = GetOID(PostgresTypeId::PG_TYPE_BOOL); + item.child_oid = GetOID(PostgresTypeId::TYPE_ID_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "boolrange"; item.typreceive = "range_recv"; - item.base_oid = GetOID(PostgresTypeId::PG_TYPE_BOOL); + item.base_oid = GetOID(PostgresTypeId::TYPE_ID_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "custombool"; item.typreceive = "domain_recv"; - item.base_oid = GetOID(PostgresTypeId::PG_TYPE_BOOL); + item.base_oid = GetOID(PostgresTypeId::TYPE_ID_BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; uint32_t class_oid = item.oid; std::vector> record_fields = { - {"int4_col", GetOID(PostgresTypeId::PG_TYPE_INT4)}, - {"text_col", GetOID(PostgresTypeId::PG_TYPE_TEXT)}}; + {"int4_col", GetOID(PostgresTypeId::TYPE_ID_INT4)}, + {"text_col", GetOID(PostgresTypeId::TYPE_ID_TEXT)}}; InsertClass(class_oid, std::move(record_fields)); item.oid++; @@ -80,10 +80,10 @@ class MockTypeResolver : public PostgresTypeResolver { }; TEST(PostgresTypeTest, PostgresTypeBasic) { - PostgresType type(PostgresTypeId::PG_TYPE_BOOL); + PostgresType type(PostgresTypeId::TYPE_ID_BOOL); EXPECT_EQ(type.field_name(), ""); EXPECT_EQ(type.typname(), ""); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); EXPECT_EQ(type.oid(), 0); EXPECT_EQ(type.n_children(), 0); @@ -116,9 +116,9 @@ TEST(PostgresTypeTest, PostgresTypeBasic) { EXPECT_EQ(domain.typname(), "domain type name"); EXPECT_EQ(domain.type_id(), type.type_id()); - PostgresType record(PostgresTypeId::PG_TYPE_RECORD); + PostgresType record(PostgresTypeId::TYPE_ID_RECORD); record.AppendChild("col1", type); - EXPECT_EQ(record.type_id(), PostgresTypeId::PG_TYPE_RECORD); + EXPECT_EQ(record.type_id(), PostgresTypeId::TYPE_ID_RECORD); EXPECT_EQ(record.n_children(), 1); EXPECT_EQ(record.child(0).type_id(), type.type_id()); EXPECT_EQ(record.child(0).field_name(), "col1"); @@ -128,64 +128,64 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { ArrowSchema schema; ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_BOOL).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_BOOL).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_INT2).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_INT2).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "s"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_INT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_INT4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "i"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_INT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_INT8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "l"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_FLOAT4).SetSchema(&schema), + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_FLOAT4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "f"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_FLOAT8).SetSchema(&schema), + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_FLOAT8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "g"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_TEXT).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_TEXT).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "u"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_BYTEA).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_BYTEA).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::PG_TYPE_BOOL).Array().SetSchema(&schema), + EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_BOOL).Array().SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+l"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType record(PostgresTypeId::PG_TYPE_RECORD); - record.AppendChild("col1", PostgresType(PostgresTypeId::PG_TYPE_BOOL)); + PostgresType record(PostgresTypeId::TYPE_ID_RECORD); + record.AppendChild("col1", PostgresType(PostgresTypeId::TYPE_ID_BOOL)); EXPECT_EQ(record.SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+s"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType unknown(PostgresTypeId::PG_TYPE_BRIN_MINMAX_MULTI_SUMMARY); + PostgresType unknown(PostgresTypeId::TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY); EXPECT_EQ(unknown.WithPgTypeInfo(0, "some_name").SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); @@ -204,73 +204,73 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BOOL), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT8), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT8), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT16), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT16), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT32), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT32), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT64), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_INT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_FLOAT), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_FLOAT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_FLOAT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_DOUBLE), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_FLOAT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_FLOAT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BINARY), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BYTEA); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BYTEA); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_STRING), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_TEXT); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_TEXT); schema.release(&schema); ArrowSchemaInit(&schema); @@ -278,8 +278,8 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_BOOL), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_ARRAY); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_ARRAY); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_BOOL); schema.release(&schema); ArrowError error; @@ -346,7 +346,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(10, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 10); EXPECT_EQ(type.typname(), "some_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); // Check insert/resolve of array type item.oid = 11; @@ -357,9 +357,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(11, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 11); EXPECT_EQ(type.typname(), "some_array_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_ARRAY); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_ARRAY); EXPECT_EQ(type.child(0).oid(), 10); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_BOOL); // Check reverse lookup of array type from item type EXPECT_EQ(resolver.FindArray(10, &type, &error), NANOARROW_OK); @@ -374,9 +374,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(12, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 12); EXPECT_EQ(type.typname(), "some_range_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_RANGE); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_RANGE); EXPECT_EQ(type.child(0).oid(), 10); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_BOOL); // Check insert/resolve of domain type item.oid = 13; @@ -387,7 +387,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(13, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 13); EXPECT_EQ(type.typname(), "some_domain_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::PG_TYPE_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); } TEST(PostgresTypeTest, PostgresTypeResolveRecord) { @@ -397,12 +397,12 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { PostgresType type; EXPECT_EQ( - resolver.Find(resolver.GetOID(PostgresTypeId::PG_TYPE_RECORD), &type, nullptr), + resolver.Find(resolver.GetOID(PostgresTypeId::TYPE_ID_RECORD), &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::PG_TYPE_RECORD)); + EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::TYPE_ID_RECORD)); EXPECT_EQ(type.n_children(), 2); EXPECT_EQ(type.child(0).field_name(), "int4_col"); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::PG_TYPE_INT4); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_INT4); EXPECT_EQ(type.child(1).field_name(), "text_col"); - EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::PG_TYPE_TEXT); + EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::TYPE_ID_TEXT); } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 66dc661a63..ef738a3319 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -184,23 +184,23 @@ struct BindStream { PostgresTypeId type_id; switch (bind_schema_fields[i].type) { case ArrowType::NANOARROW_TYPE_INT16: - type_id = PG_TYPE_INT2; + type_id = TYPE_ID_INT2; param_lengths[i] = 2; break; case ArrowType::NANOARROW_TYPE_INT32: - type_id = PG_TYPE_INT4; + type_id = TYPE_ID_INT4; param_lengths[i] = 4; break; case ArrowType::NANOARROW_TYPE_INT64: - type_id = PG_TYPE_INT8; + type_id = TYPE_ID_INT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_DOUBLE: - type_id = PG_TYPE_FLOAT8; + type_id = TYPE_ID_FLOAT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_STRING: - type_id = PG_TYPE_TEXT; + type_id = TYPE_ID_TEXT; param_lengths[i] = 0; break; default: From f071c67fcfb0ce0e04d3f0e442b49dbf0e608197 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:20:24 -0300 Subject: [PATCH 76/90] use enum class --- c/driver/postgresql/postgres_type.h | 698 +++++++++++----------- c/driver/postgresql/postgres_type_test.cc | 96 ++- c/driver/postgresql/statement.cc | 10 +- 3 files changed, 400 insertions(+), 404 deletions(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index b08897daa5..c24e5bbe1f 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -29,87 +29,87 @@ namespace adbcpq { // An enum of the types available in most Postgres pg_type tables -enum PostgresTypeId { - TYPE_ID_UNINITIALIZED, - TYPE_ID_ACLITEM, - TYPE_ID_ANYARRAY, - TYPE_ID_ANYCOMPATIBLEARRAY, - TYPE_ID_ARRAY, - TYPE_ID_BIT, - TYPE_ID_BOOL, - TYPE_ID_BOX, - TYPE_ID_BPCHAR, - TYPE_ID_BRIN_BLOOM_SUMMARY, - TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY, - TYPE_ID_BYTEA, - TYPE_ID_CASH, - TYPE_ID_CHAR, - TYPE_ID_CIDR, - TYPE_ID_CID, - TYPE_ID_CIRCLE, - TYPE_ID_CSTRING, - TYPE_ID_DATE, - TYPE_ID_DOMAIN, - TYPE_ID_FLOAT4, - TYPE_ID_FLOAT8, - TYPE_ID_INET, - TYPE_ID_INT2, - TYPE_ID_INT2VECTOR, - TYPE_ID_INT4, - TYPE_ID_INT8, - TYPE_ID_INTERVAL, - TYPE_ID_JSON, - TYPE_ID_JSONB, - TYPE_ID_JSONPATH, - TYPE_ID_LINE, - TYPE_ID_LSEG, - TYPE_ID_MACADDR, - TYPE_ID_MACADDR8, - TYPE_ID_MULTIRANGE, - TYPE_ID_NAME, - TYPE_ID_NUMERIC, - TYPE_ID_OID, - TYPE_ID_OIDVECTOR, - TYPE_ID_PATH, - TYPE_ID_PG_DDL_COMMAND, - TYPE_ID_PG_DEPENDENCIES, - TYPE_ID_PG_LSN, - TYPE_ID_PG_MCV_LIST, - TYPE_ID_PG_NDISTINCT, - TYPE_ID_PG_NODE_TREE, - TYPE_ID_PG_SNAPSHOT, - TYPE_ID_POINT, - TYPE_ID_POLY, - TYPE_ID_RANGE, - TYPE_ID_RECORD, - TYPE_ID_REGCLASS, - TYPE_ID_REGCOLLATION, - TYPE_ID_REGCONFIG, - TYPE_ID_REGDICTIONARY, - TYPE_ID_REGNAMESPACE, - TYPE_ID_REGOPERATOR, - TYPE_ID_REGOPER, - TYPE_ID_REGPROCEDURE, - TYPE_ID_REGPROC, - TYPE_ID_REGROLE, - TYPE_ID_REGTYPE, - TYPE_ID_TEXT, - TYPE_ID_TID, - TYPE_ID_TIME, - TYPE_ID_TIMESTAMP, - TYPE_ID_TIMESTAMPTZ, - TYPE_ID_TIMETZ, - TYPE_ID_TSQUERY, - TYPE_ID_TSVECTOR, - TYPE_ID_TXID_SNAPSHOT, - TYPE_ID_UNKNOWN, - TYPE_ID_UUID, - TYPE_ID_VARBIT, - TYPE_ID_VARCHAR, - TYPE_ID_VOID, - TYPE_ID_XID8, - TYPE_ID_XID, - TYPE_ID_XML +enum class PostgresTypeId { + UNINITIALIZED, + ACLITEM, + ANYARRAY, + ANYCOMPATIBLEARRAY, + ARRAY, + BIT, + BOOL, + BOX, + BPCHAR, + BRIN_BLOOM_SUMMARY, + BRIN_MINMAX_MULTI_SUMMARY, + BYTEA, + CASH, + CHAR, + CIDR, + CID, + CIRCLE, + CSTRING, + DATE, + DOMAIN_, + FLOAT4, + FLOAT8, + INET, + INT2, + INT2VECTOR, + INT4, + INT8, + INTERVAL, + JSON, + JSONB, + JSONPATH, + LINE, + LSEG, + MACADDR, + MACADDR8, + MULTIRANGE, + NAME, + NUMERIC, + OID, + OIDVECTOR, + PATH, + PG_DDL_COMMAND, + PG_DEPENDENCIES, + PG_LSN, + PG_MCV_LIST, + PG_NDISTINCT, + PG_NODE_TREE, + PG_SNAPSHOT, + POINT, + POLY, + RANGE, + RECORD, + REGCLASS, + REGCOLLATION, + REGCONFIG, + REGDICTIONARY, + REGNAMESPACE, + REGOPERATOR, + REGOPER, + REGPROCEDURE, + REGPROC, + REGROLE, + REGTYPE, + TEXT, + TID, + TIME, + TIMESTAMP, + TIMESTAMPTZ, + TIMETZ, + TSQUERY, + TSVECTOR, + TXID_SNAPSHOT, + UNKNOWN, + UUID, + VARBIT, + VARCHAR, + VOID, + XID8, + XID, + XML }; // Returns the receive function name as defined in the typrecieve column @@ -122,8 +122,8 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id); // in the pg_type typname column. static inline const char* PostgresTypname(PostgresTypeId type_id); -// A vector of all type IDs, optionally including the nested types TYPE_ID_ARRAY, -// TYPE_ID_DOMAIN, TYPE_ID_RECORD, and TYPE_ID_RANGE. +// A vector of all type IDs, optionally including the nested types PostgresTypeId::ARRAY, +// PostgresTypeId::DOMAIN_, PostgresTypeId::RECORD, and PostgresTypeId::RANGE. static inline std::vector PostgresTypeIdAll(bool nested = true); // An abstraction of a (potentially nested and/or parameterized) Postgres @@ -133,7 +133,7 @@ class PostgresType { public: explicit PostgresType(PostgresTypeId type_id) : oid_(0), type_id_(type_id) {} - PostgresType() : PostgresType(TYPE_ID_UNINITIALIZED) {} + PostgresType() : PostgresType(PostgresTypeId::UNINITIALIZED) {} void AppendChild(const std::string& field_name, const PostgresType& type) { PostgresType child(type); @@ -154,7 +154,7 @@ class PostgresType { } PostgresType Array(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(TYPE_ID_ARRAY); + PostgresType out(PostgresTypeId::ARRAY); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -166,7 +166,7 @@ class PostgresType { } PostgresType Range(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(TYPE_ID_RANGE); + PostgresType out(PostgresTypeId::RANGE); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -189,42 +189,42 @@ class PostgresType { // binary COPY representation in the output. ArrowErrorCode SetSchema(ArrowSchema* schema) const { switch (type_id_) { - case TYPE_ID_BOOL: + case PostgresTypeId::BOOL: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; - case TYPE_ID_INT2: + case PostgresTypeId::INT2: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); break; - case TYPE_ID_INT4: + case PostgresTypeId::INT4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); break; - case TYPE_ID_INT8: + case PostgresTypeId::INT8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); break; - case TYPE_ID_FLOAT4: + case PostgresTypeId::FLOAT4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); break; - case TYPE_ID_FLOAT8: + case PostgresTypeId::FLOAT8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; - case TYPE_ID_CHAR: - case TYPE_ID_BPCHAR: - case TYPE_ID_VARCHAR: - case TYPE_ID_TEXT: + case PostgresTypeId::CHAR: + case PostgresTypeId::BPCHAR: + case PostgresTypeId::VARCHAR: + case PostgresTypeId::TEXT: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; - case TYPE_ID_BYTEA: + case PostgresTypeId::BYTEA: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; - case TYPE_ID_RECORD: + case PostgresTypeId::RECORD: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); for (int64_t i = 0; i < n_children(); i++) { NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); } break; - case TYPE_ID_ARRAY: + case PostgresTypeId::ARRAY: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); break; @@ -330,7 +330,7 @@ class PostgresTypeResolver { PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); switch (base.type_id()) { - case TYPE_ID_ARRAY: { + case PostgresTypeId::ARRAY: { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); @@ -339,11 +339,11 @@ class PostgresTypeResolver { break; } - case TYPE_ID_RECORD: { + case PostgresTypeId::RECORD: { std::vector> child_desc; NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); - PostgresType out(TYPE_ID_RECORD); + PostgresType out(PostgresTypeId::RECORD); for (const auto& child_item : child_desc) { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); @@ -355,7 +355,7 @@ class PostgresTypeResolver { break; } - case TYPE_ID_DOMAIN: { + case PostgresTypeId::DOMAIN_: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); @@ -363,7 +363,7 @@ class PostgresTypeResolver { break; } - case TYPE_ID_RANGE: { + case PostgresTypeId::RANGE: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); @@ -436,26 +436,26 @@ static inline ArrowErrorCode PostgresTypeFromSchema(const PostgresTypeResolver& switch (schema_view.type) { case NANOARROW_TYPE_BOOL: - return resolver.Find(resolver.GetOID(TYPE_ID_BOOL), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::BOOL), out, error); case NANOARROW_TYPE_INT8: case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT16: - return resolver.Find(resolver.GetOID(TYPE_ID_INT2), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::INT2), out, error); case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT32: - return resolver.Find(resolver.GetOID(TYPE_ID_INT4), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::INT4), out, error); case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT64: - return resolver.Find(resolver.GetOID(TYPE_ID_INT8), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::INT8), out, error); case NANOARROW_TYPE_FLOAT: - return resolver.Find(resolver.GetOID(TYPE_ID_FLOAT4), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT4), out, error); case NANOARROW_TYPE_DOUBLE: - return resolver.Find(resolver.GetOID(TYPE_ID_FLOAT8), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT8), out, error); case NANOARROW_TYPE_STRING: - return resolver.Find(resolver.GetOID(TYPE_ID_TEXT), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::TEXT), out, error); case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return resolver.Find(resolver.GetOID(TYPE_ID_BYTEA), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::BYTEA), out, error); case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: { @@ -474,163 +474,163 @@ static inline ArrowErrorCode PostgresTypeFromSchema(const PostgresTypeResolver& static inline const char* PostgresTyprecv(PostgresTypeId type_id) { switch (type_id) { - case TYPE_ID_ACLITEM: + case PostgresTypeId::ACLITEM: return "aclitem_recv"; - case TYPE_ID_ANYARRAY: + case PostgresTypeId::ANYARRAY: return "anyarray_recv"; - case TYPE_ID_ANYCOMPATIBLEARRAY: + case PostgresTypeId::ANYCOMPATIBLEARRAY: return "anycompatiblearray_recv"; - case TYPE_ID_ARRAY: + case PostgresTypeId::ARRAY: return "array_recv"; - case TYPE_ID_BIT: + case PostgresTypeId::BIT: return "bit_recv"; - case TYPE_ID_BOOL: + case PostgresTypeId::BOOL: return "boolrecv"; - case TYPE_ID_BOX: + case PostgresTypeId::BOX: return "box_recv"; - case TYPE_ID_BPCHAR: + case PostgresTypeId::BPCHAR: return "bpcharrecv"; - case TYPE_ID_BRIN_BLOOM_SUMMARY: + case PostgresTypeId::BRIN_BLOOM_SUMMARY: return "brin_bloom_summary_recv"; - case TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY: + case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: return "brin_minmax_multi_summary_recv"; - case TYPE_ID_BYTEA: + case PostgresTypeId::BYTEA: return "bytearecv"; - case TYPE_ID_CASH: + case PostgresTypeId::CASH: return "cash_recv"; - case TYPE_ID_CHAR: + case PostgresTypeId::CHAR: return "charrecv"; - case TYPE_ID_CIDR: + case PostgresTypeId::CIDR: return "cidr_recv"; - case TYPE_ID_CID: + case PostgresTypeId::CID: return "cidrecv"; - case TYPE_ID_CIRCLE: + case PostgresTypeId::CIRCLE: return "circle_recv"; - case TYPE_ID_CSTRING: + case PostgresTypeId::CSTRING: return "cstring_recv"; - case TYPE_ID_DATE: + case PostgresTypeId::DATE: return "date_recv"; - case TYPE_ID_DOMAIN: + case PostgresTypeId::DOMAIN_: return "domain_recv"; - case TYPE_ID_FLOAT4: + case PostgresTypeId::FLOAT4: return "float4recv"; - case TYPE_ID_FLOAT8: + case PostgresTypeId::FLOAT8: return "float8recv"; - case TYPE_ID_INET: + case PostgresTypeId::INET: return "inet_recv"; - case TYPE_ID_INT2: + case PostgresTypeId::INT2: return "int2recv"; - case TYPE_ID_INT2VECTOR: + case PostgresTypeId::INT2VECTOR: return "int2vectorrecv"; - case TYPE_ID_INT4: + case PostgresTypeId::INT4: return "int4recv"; - case TYPE_ID_INT8: + case PostgresTypeId::INT8: return "int8recv"; - case TYPE_ID_INTERVAL: + case PostgresTypeId::INTERVAL: return "interval_recv"; - case TYPE_ID_JSON: + case PostgresTypeId::JSON: return "json_recv"; - case TYPE_ID_JSONB: + case PostgresTypeId::JSONB: return "jsonb_recv"; - case TYPE_ID_JSONPATH: + case PostgresTypeId::JSONPATH: return "jsonpath_recv"; - case TYPE_ID_LINE: + case PostgresTypeId::LINE: return "line_recv"; - case TYPE_ID_LSEG: + case PostgresTypeId::LSEG: return "lseg_recv"; - case TYPE_ID_MACADDR: + case PostgresTypeId::MACADDR: return "macaddr_recv"; - case TYPE_ID_MACADDR8: + case PostgresTypeId::MACADDR8: return "macaddr8_recv"; - case TYPE_ID_MULTIRANGE: + case PostgresTypeId::MULTIRANGE: return "multirange_recv"; - case TYPE_ID_NAME: + case PostgresTypeId::NAME: return "namerecv"; - case TYPE_ID_NUMERIC: + case PostgresTypeId::NUMERIC: return "numeric_recv"; - case TYPE_ID_OID: + case PostgresTypeId::OID: return "oidrecv"; - case TYPE_ID_OIDVECTOR: + case PostgresTypeId::OIDVECTOR: return "oidvectorrecv"; - case TYPE_ID_PATH: + case PostgresTypeId::PATH: return "path_recv"; - case TYPE_ID_PG_NODE_TREE: + case PostgresTypeId::PG_NODE_TREE: return "pg_node_tree_recv"; - case TYPE_ID_PG_NDISTINCT: + case PostgresTypeId::PG_NDISTINCT: return "pg_ndistinct_recv"; - case TYPE_ID_PG_DEPENDENCIES: + case PostgresTypeId::PG_DEPENDENCIES: return "pg_dependencies_recv"; - case TYPE_ID_PG_LSN: + case PostgresTypeId::PG_LSN: return "pg_lsn_recv"; - case TYPE_ID_PG_MCV_LIST: + case PostgresTypeId::PG_MCV_LIST: return "pg_mcv_list_recv"; - case TYPE_ID_PG_DDL_COMMAND: + case PostgresTypeId::PG_DDL_COMMAND: return "pg_ddl_command_recv"; - case TYPE_ID_PG_SNAPSHOT: + case PostgresTypeId::PG_SNAPSHOT: return "pg_snapshot_recv"; - case TYPE_ID_POINT: + case PostgresTypeId::POINT: return "point_recv"; - case TYPE_ID_POLY: + case PostgresTypeId::POLY: return "poly_recv"; - case TYPE_ID_RANGE: + case PostgresTypeId::RANGE: return "range_recv"; - case TYPE_ID_RECORD: + case PostgresTypeId::RECORD: return "record_recv"; - case TYPE_ID_REGCLASS: + case PostgresTypeId::REGCLASS: return "regclassrecv"; - case TYPE_ID_REGCOLLATION: + case PostgresTypeId::REGCOLLATION: return "regcollationrecv"; - case TYPE_ID_REGCONFIG: + case PostgresTypeId::REGCONFIG: return "regconfigrecv"; - case TYPE_ID_REGDICTIONARY: + case PostgresTypeId::REGDICTIONARY: return "regdictionaryrecv"; - case TYPE_ID_REGNAMESPACE: + case PostgresTypeId::REGNAMESPACE: return "regnamespacerecv"; - case TYPE_ID_REGOPERATOR: + case PostgresTypeId::REGOPERATOR: return "regoperatorrecv"; - case TYPE_ID_REGOPER: + case PostgresTypeId::REGOPER: return "regoperrecv"; - case TYPE_ID_REGPROCEDURE: + case PostgresTypeId::REGPROCEDURE: return "regprocedurerecv"; - case TYPE_ID_REGPROC: + case PostgresTypeId::REGPROC: return "regprocrecv"; - case TYPE_ID_REGROLE: + case PostgresTypeId::REGROLE: return "regrolerecv"; - case TYPE_ID_REGTYPE: + case PostgresTypeId::REGTYPE: return "regtyperecv"; - case TYPE_ID_TEXT: + case PostgresTypeId::TEXT: return "textrecv"; - case TYPE_ID_TID: + case PostgresTypeId::TID: return "tidrecv"; - case TYPE_ID_TIME: + case PostgresTypeId::TIME: return "time_recv"; - case TYPE_ID_TIMESTAMP: + case PostgresTypeId::TIMESTAMP: return "timestamp_recv"; - case TYPE_ID_TIMESTAMPTZ: + case PostgresTypeId::TIMESTAMPTZ: return "timestamptz_recv"; - case TYPE_ID_TIMETZ: + case PostgresTypeId::TIMETZ: return "timetz_recv"; - case TYPE_ID_TSQUERY: + case PostgresTypeId::TSQUERY: return "tsqueryrecv"; - case TYPE_ID_TSVECTOR: + case PostgresTypeId::TSVECTOR: return "tsvectorrecv"; - case TYPE_ID_TXID_SNAPSHOT: + case PostgresTypeId::TXID_SNAPSHOT: return "txid_snapshot_recv"; - case TYPE_ID_UNKNOWN: + case PostgresTypeId::UNKNOWN: return "unknownrecv"; - case TYPE_ID_UUID: + case PostgresTypeId::UUID: return "uuid_recv"; - case TYPE_ID_VARBIT: + case PostgresTypeId::VARBIT: return "varbit_recv"; - case TYPE_ID_VARCHAR: + case PostgresTypeId::VARCHAR: return "varcharrecv"; - case TYPE_ID_VOID: + case PostgresTypeId::VOID: return "void_recv"; - case TYPE_ID_XID8: + case PostgresTypeId::XID8: return "xid8recv"; - case TYPE_ID_XID: + case PostgresTypeId::XID: return "xidrecv"; - case TYPE_ID_XML: + case PostgresTypeId::XML: return "xml_recv"; default: return ""; @@ -639,163 +639,163 @@ static inline const char* PostgresTyprecv(PostgresTypeId type_id) { static inline const char* PostgresTypname(PostgresTypeId type_id) { switch (type_id) { - case TYPE_ID_ACLITEM: + case PostgresTypeId::ACLITEM: return "aclitem"; - case TYPE_ID_ANYARRAY: + case PostgresTypeId::ANYARRAY: return "anyarray"; - case TYPE_ID_ANYCOMPATIBLEARRAY: + case PostgresTypeId::ANYCOMPATIBLEARRAY: return "anycompatiblearray"; - case TYPE_ID_ARRAY: + case PostgresTypeId::ARRAY: return "array"; - case TYPE_ID_BIT: + case PostgresTypeId::BIT: return "bit"; - case TYPE_ID_BOOL: + case PostgresTypeId::BOOL: return "bool"; - case TYPE_ID_BOX: + case PostgresTypeId::BOX: return "box"; - case TYPE_ID_BPCHAR: + case PostgresTypeId::BPCHAR: return "bpchar"; - case TYPE_ID_BRIN_BLOOM_SUMMARY: + case PostgresTypeId::BRIN_BLOOM_SUMMARY: return "brin_bloom_summary"; - case TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY: + case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: return "brin_minmax_multi_summary"; - case TYPE_ID_BYTEA: + case PostgresTypeId::BYTEA: return "bytea"; - case TYPE_ID_CASH: + case PostgresTypeId::CASH: return "cash"; - case TYPE_ID_CHAR: + case PostgresTypeId::CHAR: return "char"; - case TYPE_ID_CIDR: + case PostgresTypeId::CIDR: return "cidr"; - case TYPE_ID_CID: + case PostgresTypeId::CID: return "cid"; - case TYPE_ID_CIRCLE: + case PostgresTypeId::CIRCLE: return "circle"; - case TYPE_ID_CSTRING: + case PostgresTypeId::CSTRING: return "cstring"; - case TYPE_ID_DATE: + case PostgresTypeId::DATE: return "date"; - case TYPE_ID_DOMAIN: + case PostgresTypeId::DOMAIN_: return "domain"; - case TYPE_ID_FLOAT4: + case PostgresTypeId::FLOAT4: return "float4"; - case TYPE_ID_FLOAT8: + case PostgresTypeId::FLOAT8: return "float8"; - case TYPE_ID_INET: + case PostgresTypeId::INET: return "inet"; - case TYPE_ID_INT2: + case PostgresTypeId::INT2: return "int2"; - case TYPE_ID_INT2VECTOR: + case PostgresTypeId::INT2VECTOR: return "int2vector"; - case TYPE_ID_INT4: + case PostgresTypeId::INT4: return "int4"; - case TYPE_ID_INT8: + case PostgresTypeId::INT8: return "int8"; - case TYPE_ID_INTERVAL: + case PostgresTypeId::INTERVAL: return "interval"; - case TYPE_ID_JSON: + case PostgresTypeId::JSON: return "json"; - case TYPE_ID_JSONB: + case PostgresTypeId::JSONB: return "jsonb"; - case TYPE_ID_JSONPATH: + case PostgresTypeId::JSONPATH: return "jsonpath"; - case TYPE_ID_LINE: + case PostgresTypeId::LINE: return "line"; - case TYPE_ID_LSEG: + case PostgresTypeId::LSEG: return "lseg"; - case TYPE_ID_MACADDR: + case PostgresTypeId::MACADDR: return "macaddr"; - case TYPE_ID_MACADDR8: + case PostgresTypeId::MACADDR8: return "macaddr8"; - case TYPE_ID_MULTIRANGE: + case PostgresTypeId::MULTIRANGE: return "multirange"; - case TYPE_ID_NAME: + case PostgresTypeId::NAME: return "name"; - case TYPE_ID_NUMERIC: + case PostgresTypeId::NUMERIC: return "numeric"; - case TYPE_ID_OID: + case PostgresTypeId::OID: return "oid"; - case TYPE_ID_OIDVECTOR: + case PostgresTypeId::OIDVECTOR: return "oidvector"; - case TYPE_ID_PATH: + case PostgresTypeId::PATH: return "path"; - case TYPE_ID_PG_NODE_TREE: + case PostgresTypeId::PG_NODE_TREE: return "pg_node_tree"; - case TYPE_ID_PG_NDISTINCT: + case PostgresTypeId::PG_NDISTINCT: return "pg_ndistinct"; - case TYPE_ID_PG_DEPENDENCIES: + case PostgresTypeId::PG_DEPENDENCIES: return "pg_dependencies"; - case TYPE_ID_PG_LSN: + case PostgresTypeId::PG_LSN: return "pg_lsn"; - case TYPE_ID_PG_MCV_LIST: + case PostgresTypeId::PG_MCV_LIST: return "pg_mcv_list"; - case TYPE_ID_PG_DDL_COMMAND: + case PostgresTypeId::PG_DDL_COMMAND: return "pg_ddl_command"; - case TYPE_ID_PG_SNAPSHOT: + case PostgresTypeId::PG_SNAPSHOT: return "pg_snapshot"; - case TYPE_ID_POINT: + case PostgresTypeId::POINT: return "point"; - case TYPE_ID_POLY: + case PostgresTypeId::POLY: return "poly"; - case TYPE_ID_RANGE: + case PostgresTypeId::RANGE: return "range"; - case TYPE_ID_RECORD: + case PostgresTypeId::RECORD: return "record"; - case TYPE_ID_REGCLASS: + case PostgresTypeId::REGCLASS: return "regclass"; - case TYPE_ID_REGCOLLATION: + case PostgresTypeId::REGCOLLATION: return "regcollation"; - case TYPE_ID_REGCONFIG: + case PostgresTypeId::REGCONFIG: return "regconfig"; - case TYPE_ID_REGDICTIONARY: + case PostgresTypeId::REGDICTIONARY: return "regdictionary"; - case TYPE_ID_REGNAMESPACE: + case PostgresTypeId::REGNAMESPACE: return "regnamespace"; - case TYPE_ID_REGOPERATOR: + case PostgresTypeId::REGOPERATOR: return "regoperator"; - case TYPE_ID_REGOPER: + case PostgresTypeId::REGOPER: return "regoper"; - case TYPE_ID_REGPROCEDURE: + case PostgresTypeId::REGPROCEDURE: return "regprocedure"; - case TYPE_ID_REGPROC: + case PostgresTypeId::REGPROC: return "regproc"; - case TYPE_ID_REGROLE: + case PostgresTypeId::REGROLE: return "regrole"; - case TYPE_ID_REGTYPE: + case PostgresTypeId::REGTYPE: return "regtype"; - case TYPE_ID_TEXT: + case PostgresTypeId::TEXT: return "text"; - case TYPE_ID_TID: + case PostgresTypeId::TID: return "tid"; - case TYPE_ID_TIME: + case PostgresTypeId::TIME: return "time"; - case TYPE_ID_TIMESTAMP: + case PostgresTypeId::TIMESTAMP: return "timestamp"; - case TYPE_ID_TIMESTAMPTZ: + case PostgresTypeId::TIMESTAMPTZ: return "timestamptz"; - case TYPE_ID_TIMETZ: + case PostgresTypeId::TIMETZ: return "timetz"; - case TYPE_ID_TSQUERY: + case PostgresTypeId::TSQUERY: return "tsquery"; - case TYPE_ID_TSVECTOR: + case PostgresTypeId::TSVECTOR: return "tsvector"; - case TYPE_ID_TXID_SNAPSHOT: + case PostgresTypeId::TXID_SNAPSHOT: return "txid_snapshot"; - case TYPE_ID_UNKNOWN: + case PostgresTypeId::UNKNOWN: return "unknown"; - case TYPE_ID_UUID: + case PostgresTypeId::UUID: return "uuid"; - case TYPE_ID_VARBIT: + case PostgresTypeId::VARBIT: return "varbit"; - case TYPE_ID_VARCHAR: + case PostgresTypeId::VARCHAR: return "varchar"; - case TYPE_ID_VOID: + case PostgresTypeId::VOID: return "void"; - case TYPE_ID_XID8: + case PostgresTypeId::XID8: return "xid8"; - case TYPE_ID_XID: + case PostgresTypeId::XID: return "xid"; - case TYPE_ID_XML: + case PostgresTypeId::XML: return "xml"; default: return ""; @@ -803,87 +803,87 @@ static inline const char* PostgresTypname(PostgresTypeId type_id) { } static inline std::vector PostgresTypeIdAll(bool nested) { - std::vector base = {TYPE_ID_ACLITEM, - TYPE_ID_ANYARRAY, - TYPE_ID_ANYCOMPATIBLEARRAY, - TYPE_ID_BIT, - TYPE_ID_BOOL, - TYPE_ID_BOX, - TYPE_ID_BPCHAR, - TYPE_ID_BRIN_BLOOM_SUMMARY, - TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY, - TYPE_ID_BYTEA, - TYPE_ID_CASH, - TYPE_ID_CHAR, - TYPE_ID_CIDR, - TYPE_ID_CID, - TYPE_ID_CIRCLE, - TYPE_ID_CSTRING, - TYPE_ID_DATE, - TYPE_ID_FLOAT4, - TYPE_ID_FLOAT8, - TYPE_ID_INET, - TYPE_ID_INT2, - TYPE_ID_INT2VECTOR, - TYPE_ID_INT4, - TYPE_ID_INT8, - TYPE_ID_INTERVAL, - TYPE_ID_JSON, - TYPE_ID_JSONB, - TYPE_ID_JSONPATH, - TYPE_ID_LINE, - TYPE_ID_LSEG, - TYPE_ID_MACADDR, - TYPE_ID_MACADDR8, - TYPE_ID_MULTIRANGE, - TYPE_ID_NAME, - TYPE_ID_NUMERIC, - TYPE_ID_OID, - TYPE_ID_OIDVECTOR, - TYPE_ID_PATH, - TYPE_ID_PG_NODE_TREE, - TYPE_ID_PG_NDISTINCT, - TYPE_ID_PG_DEPENDENCIES, - TYPE_ID_PG_LSN, - TYPE_ID_PG_MCV_LIST, - TYPE_ID_PG_DDL_COMMAND, - TYPE_ID_PG_SNAPSHOT, - TYPE_ID_POINT, - TYPE_ID_POLY, - TYPE_ID_REGCLASS, - TYPE_ID_REGCOLLATION, - TYPE_ID_REGCONFIG, - TYPE_ID_REGDICTIONARY, - TYPE_ID_REGNAMESPACE, - TYPE_ID_REGOPERATOR, - TYPE_ID_REGOPER, - TYPE_ID_REGPROCEDURE, - TYPE_ID_REGPROC, - TYPE_ID_REGROLE, - TYPE_ID_REGTYPE, - TYPE_ID_TEXT, - TYPE_ID_TID, - TYPE_ID_TIME, - TYPE_ID_TIMESTAMP, - TYPE_ID_TIMESTAMPTZ, - TYPE_ID_TIMETZ, - TYPE_ID_TSQUERY, - TYPE_ID_TSVECTOR, - TYPE_ID_TXID_SNAPSHOT, - TYPE_ID_UNKNOWN, - TYPE_ID_UUID, - TYPE_ID_VARBIT, - TYPE_ID_VARCHAR, - TYPE_ID_VOID, - TYPE_ID_XID8, - TYPE_ID_XID, - TYPE_ID_XML}; + std::vector base = {PostgresTypeId::ACLITEM, + PostgresTypeId::ANYARRAY, + PostgresTypeId::ANYCOMPATIBLEARRAY, + PostgresTypeId::BIT, + PostgresTypeId::BOOL, + PostgresTypeId::BOX, + PostgresTypeId::BPCHAR, + PostgresTypeId::BRIN_BLOOM_SUMMARY, + PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY, + PostgresTypeId::BYTEA, + PostgresTypeId::CASH, + PostgresTypeId::CHAR, + PostgresTypeId::CIDR, + PostgresTypeId::CID, + PostgresTypeId::CIRCLE, + PostgresTypeId::CSTRING, + PostgresTypeId::DATE, + PostgresTypeId::FLOAT4, + PostgresTypeId::FLOAT8, + PostgresTypeId::INET, + PostgresTypeId::INT2, + PostgresTypeId::INT2VECTOR, + PostgresTypeId::INT4, + PostgresTypeId::INT8, + PostgresTypeId::INTERVAL, + PostgresTypeId::JSON, + PostgresTypeId::JSONB, + PostgresTypeId::JSONPATH, + PostgresTypeId::LINE, + PostgresTypeId::LSEG, + PostgresTypeId::MACADDR, + PostgresTypeId::MACADDR8, + PostgresTypeId::MULTIRANGE, + PostgresTypeId::NAME, + PostgresTypeId::NUMERIC, + PostgresTypeId::OID, + PostgresTypeId::OIDVECTOR, + PostgresTypeId::PATH, + PostgresTypeId::PG_NODE_TREE, + PostgresTypeId::PG_NDISTINCT, + PostgresTypeId::PG_DEPENDENCIES, + PostgresTypeId::PG_LSN, + PostgresTypeId::PG_MCV_LIST, + PostgresTypeId::PG_DDL_COMMAND, + PostgresTypeId::PG_SNAPSHOT, + PostgresTypeId::POINT, + PostgresTypeId::POLY, + PostgresTypeId::REGCLASS, + PostgresTypeId::REGCOLLATION, + PostgresTypeId::REGCONFIG, + PostgresTypeId::REGDICTIONARY, + PostgresTypeId::REGNAMESPACE, + PostgresTypeId::REGOPERATOR, + PostgresTypeId::REGOPER, + PostgresTypeId::REGPROCEDURE, + PostgresTypeId::REGPROC, + PostgresTypeId::REGROLE, + PostgresTypeId::REGTYPE, + PostgresTypeId::TEXT, + PostgresTypeId::TID, + PostgresTypeId::TIME, + PostgresTypeId::TIMESTAMP, + PostgresTypeId::TIMESTAMPTZ, + PostgresTypeId::TIMETZ, + PostgresTypeId::TSQUERY, + PostgresTypeId::TSVECTOR, + PostgresTypeId::TXID_SNAPSHOT, + PostgresTypeId::UNKNOWN, + PostgresTypeId::UUID, + PostgresTypeId::VARBIT, + PostgresTypeId::VARCHAR, + PostgresTypeId::VOID, + PostgresTypeId::XID8, + PostgresTypeId::XID, + PostgresTypeId::XML}; if (nested) { - base.push_back(TYPE_ID_ARRAY); - base.push_back(TYPE_ID_RECORD); - base.push_back(TYPE_ID_RANGE); - base.push_back(TYPE_ID_DOMAIN); + base.push_back(PostgresTypeId::ARRAY); + base.push_back(PostgresTypeId::RECORD); + base.push_back(PostgresTypeId::RANGE); + base.push_back(PostgresTypeId::DOMAIN_); } return base; diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index deccdf361b..e4bc039f28 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -47,26 +47,26 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; item.typname = "_bool"; item.typreceive = "array_recv"; - item.child_oid = GetOID(PostgresTypeId::TYPE_ID_BOOL); + item.child_oid = GetOID(PostgresTypeId::BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "boolrange"; item.typreceive = "range_recv"; - item.base_oid = GetOID(PostgresTypeId::TYPE_ID_BOOL); + item.base_oid = GetOID(PostgresTypeId::BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "custombool"; item.typreceive = "domain_recv"; - item.base_oid = GetOID(PostgresTypeId::TYPE_ID_BOOL); + item.base_oid = GetOID(PostgresTypeId::BOOL); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; uint32_t class_oid = item.oid; std::vector> record_fields = { - {"int4_col", GetOID(PostgresTypeId::TYPE_ID_INT4)}, - {"text_col", GetOID(PostgresTypeId::TYPE_ID_TEXT)}}; + {"int4_col", GetOID(PostgresTypeId::INT4)}, + {"text_col", GetOID(PostgresTypeId::TEXT)}}; InsertClass(class_oid, std::move(record_fields)); item.oid++; @@ -80,10 +80,10 @@ class MockTypeResolver : public PostgresTypeResolver { }; TEST(PostgresTypeTest, PostgresTypeBasic) { - PostgresType type(PostgresTypeId::TYPE_ID_BOOL); + PostgresType type(PostgresTypeId::BOOL); EXPECT_EQ(type.field_name(), ""); EXPECT_EQ(type.typname(), ""); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); EXPECT_EQ(type.oid(), 0); EXPECT_EQ(type.n_children(), 0); @@ -116,9 +116,9 @@ TEST(PostgresTypeTest, PostgresTypeBasic) { EXPECT_EQ(domain.typname(), "domain type name"); EXPECT_EQ(domain.type_id(), type.type_id()); - PostgresType record(PostgresTypeId::TYPE_ID_RECORD); + PostgresType record(PostgresTypeId::RECORD); record.AppendChild("col1", type); - EXPECT_EQ(record.type_id(), PostgresTypeId::TYPE_ID_RECORD); + EXPECT_EQ(record.type_id(), PostgresTypeId::RECORD); EXPECT_EQ(record.n_children(), 1); EXPECT_EQ(record.child(0).type_id(), type.type_id()); EXPECT_EQ(record.child(0).field_name(), "col1"); @@ -128,64 +128,61 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { ArrowSchema schema; ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_BOOL).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::BOOL).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_INT2).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::INT2).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "s"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_INT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::INT4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "i"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_INT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::INT8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "l"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_FLOAT4).SetSchema(&schema), - NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::FLOAT4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "f"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_FLOAT8).SetSchema(&schema), - NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::FLOAT8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "g"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_TEXT).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::TEXT).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "u"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_BYTEA).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::BYTEA).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TYPE_ID_BOOL).Array().SetSchema(&schema), - NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::BOOL).Array().SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+l"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType record(PostgresTypeId::TYPE_ID_RECORD); - record.AppendChild("col1", PostgresType(PostgresTypeId::TYPE_ID_BOOL)); + PostgresType record(PostgresTypeId::RECORD); + record.AppendChild("col1", PostgresType(PostgresTypeId::BOOL)); EXPECT_EQ(record.SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+s"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType unknown(PostgresTypeId::TYPE_ID_BRIN_MINMAX_MULTI_SUMMARY); + PostgresType unknown(PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY); EXPECT_EQ(unknown.WithPgTypeInfo(0, "some_name").SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); @@ -204,73 +201,73 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BOOL), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT8), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT8), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT16), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT16), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT32), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT32), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT64), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_INT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::INT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_FLOAT), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_FLOAT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::FLOAT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_DOUBLE), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_FLOAT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::FLOAT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BINARY), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BYTEA); + EXPECT_EQ(type.type_id(), PostgresTypeId::BYTEA); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_STRING), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_TEXT); + EXPECT_EQ(type.type_id(), PostgresTypeId::TEXT); schema.release(&schema); ArrowSchemaInit(&schema); @@ -278,8 +275,8 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_BOOL), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_ARRAY); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::ARRAY); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); schema.release(&schema); ArrowError error; @@ -346,7 +343,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(10, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 10); EXPECT_EQ(type.typname(), "some_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); // Check insert/resolve of array type item.oid = 11; @@ -357,9 +354,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(11, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 11); EXPECT_EQ(type.typname(), "some_array_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_ARRAY); + EXPECT_EQ(type.type_id(), PostgresTypeId::ARRAY); EXPECT_EQ(type.child(0).oid(), 10); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); // Check reverse lookup of array type from item type EXPECT_EQ(resolver.FindArray(10, &type, &error), NANOARROW_OK); @@ -374,9 +371,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(12, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 12); EXPECT_EQ(type.typname(), "some_range_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_RANGE); + EXPECT_EQ(type.type_id(), PostgresTypeId::RANGE); EXPECT_EQ(type.child(0).oid(), 10); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); // Check insert/resolve of domain type item.oid = 13; @@ -387,7 +384,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(13, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 13); EXPECT_EQ(type.typname(), "some_domain_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::TYPE_ID_BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); } TEST(PostgresTypeTest, PostgresTypeResolveRecord) { @@ -396,13 +393,12 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { ASSERT_EQ(resolver.Init(), NANOARROW_OK); PostgresType type; - EXPECT_EQ( - resolver.Find(resolver.GetOID(PostgresTypeId::TYPE_ID_RECORD), &type, nullptr), - NANOARROW_OK); - EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::TYPE_ID_RECORD)); + EXPECT_EQ(resolver.Find(resolver.GetOID(PostgresTypeId::RECORD), &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::RECORD)); EXPECT_EQ(type.n_children(), 2); EXPECT_EQ(type.child(0).field_name(), "int4_col"); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::TYPE_ID_INT4); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::INT4); EXPECT_EQ(type.child(1).field_name(), "text_col"); - EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::TYPE_ID_TEXT); + EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::TEXT); } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index ef738a3319..f86a7a5616 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -184,23 +184,23 @@ struct BindStream { PostgresTypeId type_id; switch (bind_schema_fields[i].type) { case ArrowType::NANOARROW_TYPE_INT16: - type_id = TYPE_ID_INT2; + type_id = PostgresTypeId::INT2; param_lengths[i] = 2; break; case ArrowType::NANOARROW_TYPE_INT32: - type_id = TYPE_ID_INT4; + type_id = PostgresTypeId::INT4; param_lengths[i] = 4; break; case ArrowType::NANOARROW_TYPE_INT64: - type_id = TYPE_ID_INT8; + type_id = PostgresTypeId::INT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_DOUBLE: - type_id = TYPE_ID_FLOAT8; + type_id = PostgresTypeId::FLOAT8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_STRING: - type_id = TYPE_ID_TEXT; + type_id = PostgresTypeId::TEXT; param_lengths[i] = 0; break; default: From bdc374622b273fb95537d04c82e7436b0dd332a2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:22:20 -0300 Subject: [PATCH 77/90] Update c/driver/postgresql/postgres_type.h Co-authored-by: David Li --- c/driver/postgresql/postgres_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index c24e5bbe1f..55e05d9d4a 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -235,7 +235,7 @@ class PostgresType { nanoarrow::UniqueBuffer buffer; ArrowMetadataBuilderInit(buffer.get(), nullptr); NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( - buffer.get(), ArrowCharView("ADBC:posgresql:typname"), + buffer.get(), ArrowCharView("ADBC:postgresql:typname"), ArrowCharView(typname_.c_str()))); NANOARROW_RETURN_NOT_OK( ArrowSchemaSetMetadata(schema, reinterpret_cast(buffer->data))); From 187de415def1b7254bc89b114c9f35d5c2391828 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:37:17 -0300 Subject: [PATCH 78/90] move verbose bits to an implementation file --- c/driver/postgresql/CMakeLists.txt | 1 + c/driver/postgresql/postgres_type.cc | 607 ++++++++++++++++++++++ c/driver/postgresql/postgres_type.h | 595 +-------------------- c/driver/postgresql/postgres_type_test.cc | 31 +- 4 files changed, 635 insertions(+), 599 deletions(-) create mode 100644 c/driver/postgresql/postgres_type.cc diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index e14ea20dcc..5681039596 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -39,6 +39,7 @@ endif() add_arrow_lib(adbc_driver_postgresql SOURCES + postgres_type.cc connection.cc database.cc postgresql.cc diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc new file mode 100644 index 0000000000..1bb8fe5563 --- /dev/null +++ b/c/driver/postgresql/postgres_type.cc @@ -0,0 +1,607 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "nanoarrow/nanoarrow.hpp" + +#include "postgres_type.h" + +namespace adbcpq { + +ArrowErrorCode PostgresType::SetSchema(ArrowSchema* schema) const { + switch (type_id_) { + case PostgresTypeId::BOOL: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); + break; + case PostgresTypeId::INT2: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); + break; + case PostgresTypeId::INT4: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); + break; + case PostgresTypeId::INT8: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); + break; + case PostgresTypeId::FLOAT4: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); + break; + case PostgresTypeId::FLOAT8: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); + break; + case PostgresTypeId::CHAR: + case PostgresTypeId::BPCHAR: + case PostgresTypeId::VARCHAR: + case PostgresTypeId::TEXT: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); + break; + case PostgresTypeId::BYTEA: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + break; + + case PostgresTypeId::RECORD: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); + for (int64_t i = 0; i < n_children(); i++) { + NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); + } + break; + + case PostgresTypeId::ARRAY: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); + NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); + break; + default: { + // For any types we don't explicitly know how to deal with, we can still + // return the bytes postgres gives us and attach the type name as metadata + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + nanoarrow::UniqueBuffer buffer; + ArrowMetadataBuilderInit(buffer.get(), nullptr); + NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( + buffer.get(), ArrowCharView("ADBC:postgresql:typname"), + ArrowCharView(typname_.c_str()))); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetMetadata(schema, reinterpret_cast(buffer->data))); + break; + } + } + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, field_name_.c_str())); + return NANOARROW_OK; +} + +ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resolver, + ArrowSchema* schema, PostgresType* out, + ArrowError* error) { + ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + return resolver.Find(resolver.GetOID(PostgresTypeId::BOOL), out, error); + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT16: + return resolver.Find(resolver.GetOID(PostgresTypeId::INT2), out, error); + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT32: + return resolver.Find(resolver.GetOID(PostgresTypeId::INT4), out, error); + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT64: + return resolver.Find(resolver.GetOID(PostgresTypeId::INT8), out, error); + case NANOARROW_TYPE_FLOAT: + return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT4), out, error); + case NANOARROW_TYPE_DOUBLE: + return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT8), out, error); + case NANOARROW_TYPE_STRING: + return resolver.Find(resolver.GetOID(PostgresTypeId::TEXT), out, error); + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + return resolver.Find(resolver.GetOID(PostgresTypeId::BYTEA), out, error); + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_FIXED_SIZE_LIST: { + PostgresType child; + NANOARROW_RETURN_NOT_OK( + PostgresType::FromSchema(resolver, schema->children[0], &child, error)); + return resolver.FindArray(child.oid(), out, error); + } + + default: + ArrowErrorSet(error, "Can't map Arrow type '%s' to Postgres type", + ArrowTypeString(schema_view.type)); + return ENOTSUP; + } +} + +ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, ArrowError* error) { + auto result = base_.find(item.typreceive); + if (result == base_.end()) { + ArrowErrorSet(error, "Base type not found for type '%s' with receive function '%s'", + item.typname, item.typreceive); + return ENOTSUP; + } + + const PostgresType& base = result->second; + PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); + + switch (base.type_id()) { + case PostgresTypeId::ARRAY: { + PostgresType child; + NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); + mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + array_mapping_.insert({child.oid(), item.oid}); + break; + } + + case PostgresTypeId::RECORD: { + std::vector> child_desc; + NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); + + PostgresType out(PostgresTypeId::RECORD); + for (const auto& child_item : child_desc) { + PostgresType child; + NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); + out.AppendChild(child_item.first, child); + } + + mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + case PostgresTypeId::DOMAIN_: { + PostgresType base_type; + NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); + mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + case PostgresTypeId::RANGE: { + PostgresType base_type; + NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); + mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + default: + mapping_.insert({item.oid, type}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + return NANOARROW_OK; +} + +const char* PostgresTyprecv(PostgresTypeId type_id) { + switch (type_id) { + case PostgresTypeId::ACLITEM: + return "aclitem_recv"; + case PostgresTypeId::ANYARRAY: + return "anyarray_recv"; + case PostgresTypeId::ANYCOMPATIBLEARRAY: + return "anycompatiblearray_recv"; + case PostgresTypeId::ARRAY: + return "array_recv"; + case PostgresTypeId::BIT: + return "bit_recv"; + case PostgresTypeId::BOOL: + return "boolrecv"; + case PostgresTypeId::BOX: + return "box_recv"; + case PostgresTypeId::BPCHAR: + return "bpcharrecv"; + case PostgresTypeId::BRIN_BLOOM_SUMMARY: + return "brin_bloom_summary_recv"; + case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: + return "brin_minmax_multi_summary_recv"; + case PostgresTypeId::BYTEA: + return "bytearecv"; + case PostgresTypeId::CASH: + return "cash_recv"; + case PostgresTypeId::CHAR: + return "charrecv"; + case PostgresTypeId::CIDR: + return "cidr_recv"; + case PostgresTypeId::CID: + return "cidrecv"; + case PostgresTypeId::CIRCLE: + return "circle_recv"; + case PostgresTypeId::CSTRING: + return "cstring_recv"; + case PostgresTypeId::DATE: + return "date_recv"; + case PostgresTypeId::DOMAIN_: + return "domain_recv"; + case PostgresTypeId::FLOAT4: + return "float4recv"; + case PostgresTypeId::FLOAT8: + return "float8recv"; + case PostgresTypeId::INET: + return "inet_recv"; + case PostgresTypeId::INT2: + return "int2recv"; + case PostgresTypeId::INT2VECTOR: + return "int2vectorrecv"; + case PostgresTypeId::INT4: + return "int4recv"; + case PostgresTypeId::INT8: + return "int8recv"; + case PostgresTypeId::INTERVAL: + return "interval_recv"; + case PostgresTypeId::JSON: + return "json_recv"; + case PostgresTypeId::JSONB: + return "jsonb_recv"; + case PostgresTypeId::JSONPATH: + return "jsonpath_recv"; + case PostgresTypeId::LINE: + return "line_recv"; + case PostgresTypeId::LSEG: + return "lseg_recv"; + case PostgresTypeId::MACADDR: + return "macaddr_recv"; + case PostgresTypeId::MACADDR8: + return "macaddr8_recv"; + case PostgresTypeId::MULTIRANGE: + return "multirange_recv"; + case PostgresTypeId::NAME: + return "namerecv"; + case PostgresTypeId::NUMERIC: + return "numeric_recv"; + case PostgresTypeId::OID: + return "oidrecv"; + case PostgresTypeId::OIDVECTOR: + return "oidvectorrecv"; + case PostgresTypeId::PATH: + return "path_recv"; + case PostgresTypeId::PG_NODE_TREE: + return "pg_node_tree_recv"; + case PostgresTypeId::PG_NDISTINCT: + return "pg_ndistinct_recv"; + case PostgresTypeId::PG_DEPENDENCIES: + return "pg_dependencies_recv"; + case PostgresTypeId::PG_LSN: + return "pg_lsn_recv"; + case PostgresTypeId::PG_MCV_LIST: + return "pg_mcv_list_recv"; + case PostgresTypeId::PG_DDL_COMMAND: + return "pg_ddl_command_recv"; + case PostgresTypeId::PG_SNAPSHOT: + return "pg_snapshot_recv"; + case PostgresTypeId::POINT: + return "point_recv"; + case PostgresTypeId::POLY: + return "poly_recv"; + case PostgresTypeId::RANGE: + return "range_recv"; + case PostgresTypeId::RECORD: + return "record_recv"; + case PostgresTypeId::REGCLASS: + return "regclassrecv"; + case PostgresTypeId::REGCOLLATION: + return "regcollationrecv"; + case PostgresTypeId::REGCONFIG: + return "regconfigrecv"; + case PostgresTypeId::REGDICTIONARY: + return "regdictionaryrecv"; + case PostgresTypeId::REGNAMESPACE: + return "regnamespacerecv"; + case PostgresTypeId::REGOPERATOR: + return "regoperatorrecv"; + case PostgresTypeId::REGOPER: + return "regoperrecv"; + case PostgresTypeId::REGPROCEDURE: + return "regprocedurerecv"; + case PostgresTypeId::REGPROC: + return "regprocrecv"; + case PostgresTypeId::REGROLE: + return "regrolerecv"; + case PostgresTypeId::REGTYPE: + return "regtyperecv"; + case PostgresTypeId::TEXT: + return "textrecv"; + case PostgresTypeId::TID: + return "tidrecv"; + case PostgresTypeId::TIME: + return "time_recv"; + case PostgresTypeId::TIMESTAMP: + return "timestamp_recv"; + case PostgresTypeId::TIMESTAMPTZ: + return "timestamptz_recv"; + case PostgresTypeId::TIMETZ: + return "timetz_recv"; + case PostgresTypeId::TSQUERY: + return "tsqueryrecv"; + case PostgresTypeId::TSVECTOR: + return "tsvectorrecv"; + case PostgresTypeId::TXID_SNAPSHOT: + return "txid_snapshot_recv"; + case PostgresTypeId::UNKNOWN: + return "unknownrecv"; + case PostgresTypeId::UUID: + return "uuid_recv"; + case PostgresTypeId::VARBIT: + return "varbit_recv"; + case PostgresTypeId::VARCHAR: + return "varcharrecv"; + case PostgresTypeId::VOID: + return "void_recv"; + case PostgresTypeId::XID8: + return "xid8recv"; + case PostgresTypeId::XID: + return "xidrecv"; + case PostgresTypeId::XML: + return "xml_recv"; + default: + return ""; + } +} + +const char* PostgresTypname(PostgresTypeId type_id) { + switch (type_id) { + case PostgresTypeId::ACLITEM: + return "aclitem"; + case PostgresTypeId::ANYARRAY: + return "anyarray"; + case PostgresTypeId::ANYCOMPATIBLEARRAY: + return "anycompatiblearray"; + case PostgresTypeId::ARRAY: + return "array"; + case PostgresTypeId::BIT: + return "bit"; + case PostgresTypeId::BOOL: + return "bool"; + case PostgresTypeId::BOX: + return "box"; + case PostgresTypeId::BPCHAR: + return "bpchar"; + case PostgresTypeId::BRIN_BLOOM_SUMMARY: + return "brin_bloom_summary"; + case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: + return "brin_minmax_multi_summary"; + case PostgresTypeId::BYTEA: + return "bytea"; + case PostgresTypeId::CASH: + return "cash"; + case PostgresTypeId::CHAR: + return "char"; + case PostgresTypeId::CIDR: + return "cidr"; + case PostgresTypeId::CID: + return "cid"; + case PostgresTypeId::CIRCLE: + return "circle"; + case PostgresTypeId::CSTRING: + return "cstring"; + case PostgresTypeId::DATE: + return "date"; + case PostgresTypeId::DOMAIN_: + return "domain"; + case PostgresTypeId::FLOAT4: + return "float4"; + case PostgresTypeId::FLOAT8: + return "float8"; + case PostgresTypeId::INET: + return "inet"; + case PostgresTypeId::INT2: + return "int2"; + case PostgresTypeId::INT2VECTOR: + return "int2vector"; + case PostgresTypeId::INT4: + return "int4"; + case PostgresTypeId::INT8: + return "int8"; + case PostgresTypeId::INTERVAL: + return "interval"; + case PostgresTypeId::JSON: + return "json"; + case PostgresTypeId::JSONB: + return "jsonb"; + case PostgresTypeId::JSONPATH: + return "jsonpath"; + case PostgresTypeId::LINE: + return "line"; + case PostgresTypeId::LSEG: + return "lseg"; + case PostgresTypeId::MACADDR: + return "macaddr"; + case PostgresTypeId::MACADDR8: + return "macaddr8"; + case PostgresTypeId::MULTIRANGE: + return "multirange"; + case PostgresTypeId::NAME: + return "name"; + case PostgresTypeId::NUMERIC: + return "numeric"; + case PostgresTypeId::OID: + return "oid"; + case PostgresTypeId::OIDVECTOR: + return "oidvector"; + case PostgresTypeId::PATH: + return "path"; + case PostgresTypeId::PG_NODE_TREE: + return "pg_node_tree"; + case PostgresTypeId::PG_NDISTINCT: + return "pg_ndistinct"; + case PostgresTypeId::PG_DEPENDENCIES: + return "pg_dependencies"; + case PostgresTypeId::PG_LSN: + return "pg_lsn"; + case PostgresTypeId::PG_MCV_LIST: + return "pg_mcv_list"; + case PostgresTypeId::PG_DDL_COMMAND: + return "pg_ddl_command"; + case PostgresTypeId::PG_SNAPSHOT: + return "pg_snapshot"; + case PostgresTypeId::POINT: + return "point"; + case PostgresTypeId::POLY: + return "poly"; + case PostgresTypeId::RANGE: + return "range"; + case PostgresTypeId::RECORD: + return "record"; + case PostgresTypeId::REGCLASS: + return "regclass"; + case PostgresTypeId::REGCOLLATION: + return "regcollation"; + case PostgresTypeId::REGCONFIG: + return "regconfig"; + case PostgresTypeId::REGDICTIONARY: + return "regdictionary"; + case PostgresTypeId::REGNAMESPACE: + return "regnamespace"; + case PostgresTypeId::REGOPERATOR: + return "regoperator"; + case PostgresTypeId::REGOPER: + return "regoper"; + case PostgresTypeId::REGPROCEDURE: + return "regprocedure"; + case PostgresTypeId::REGPROC: + return "regproc"; + case PostgresTypeId::REGROLE: + return "regrole"; + case PostgresTypeId::REGTYPE: + return "regtype"; + case PostgresTypeId::TEXT: + return "text"; + case PostgresTypeId::TID: + return "tid"; + case PostgresTypeId::TIME: + return "time"; + case PostgresTypeId::TIMESTAMP: + return "timestamp"; + case PostgresTypeId::TIMESTAMPTZ: + return "timestamptz"; + case PostgresTypeId::TIMETZ: + return "timetz"; + case PostgresTypeId::TSQUERY: + return "tsquery"; + case PostgresTypeId::TSVECTOR: + return "tsvector"; + case PostgresTypeId::TXID_SNAPSHOT: + return "txid_snapshot"; + case PostgresTypeId::UNKNOWN: + return "unknown"; + case PostgresTypeId::UUID: + return "uuid"; + case PostgresTypeId::VARBIT: + return "varbit"; + case PostgresTypeId::VARCHAR: + return "varchar"; + case PostgresTypeId::VOID: + return "void"; + case PostgresTypeId::XID8: + return "xid8"; + case PostgresTypeId::XID: + return "xid"; + case PostgresTypeId::XML: + return "xml"; + default: + return ""; + } +} + +std::vector PostgresTypeIdAll(bool nested) { + std::vector base = {PostgresTypeId::ACLITEM, + PostgresTypeId::ANYARRAY, + PostgresTypeId::ANYCOMPATIBLEARRAY, + PostgresTypeId::BIT, + PostgresTypeId::BOOL, + PostgresTypeId::BOX, + PostgresTypeId::BPCHAR, + PostgresTypeId::BRIN_BLOOM_SUMMARY, + PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY, + PostgresTypeId::BYTEA, + PostgresTypeId::CASH, + PostgresTypeId::CHAR, + PostgresTypeId::CIDR, + PostgresTypeId::CID, + PostgresTypeId::CIRCLE, + PostgresTypeId::CSTRING, + PostgresTypeId::DATE, + PostgresTypeId::FLOAT4, + PostgresTypeId::FLOAT8, + PostgresTypeId::INET, + PostgresTypeId::INT2, + PostgresTypeId::INT2VECTOR, + PostgresTypeId::INT4, + PostgresTypeId::INT8, + PostgresTypeId::INTERVAL, + PostgresTypeId::JSON, + PostgresTypeId::JSONB, + PostgresTypeId::JSONPATH, + PostgresTypeId::LINE, + PostgresTypeId::LSEG, + PostgresTypeId::MACADDR, + PostgresTypeId::MACADDR8, + PostgresTypeId::MULTIRANGE, + PostgresTypeId::NAME, + PostgresTypeId::NUMERIC, + PostgresTypeId::OID, + PostgresTypeId::OIDVECTOR, + PostgresTypeId::PATH, + PostgresTypeId::PG_NODE_TREE, + PostgresTypeId::PG_NDISTINCT, + PostgresTypeId::PG_DEPENDENCIES, + PostgresTypeId::PG_LSN, + PostgresTypeId::PG_MCV_LIST, + PostgresTypeId::PG_DDL_COMMAND, + PostgresTypeId::PG_SNAPSHOT, + PostgresTypeId::POINT, + PostgresTypeId::POLY, + PostgresTypeId::REGCLASS, + PostgresTypeId::REGCOLLATION, + PostgresTypeId::REGCONFIG, + PostgresTypeId::REGDICTIONARY, + PostgresTypeId::REGNAMESPACE, + PostgresTypeId::REGOPERATOR, + PostgresTypeId::REGOPER, + PostgresTypeId::REGPROCEDURE, + PostgresTypeId::REGPROC, + PostgresTypeId::REGROLE, + PostgresTypeId::REGTYPE, + PostgresTypeId::TEXT, + PostgresTypeId::TID, + PostgresTypeId::TIME, + PostgresTypeId::TIMESTAMP, + PostgresTypeId::TIMESTAMPTZ, + PostgresTypeId::TIMETZ, + PostgresTypeId::TSQUERY, + PostgresTypeId::TSVECTOR, + PostgresTypeId::TXID_SNAPSHOT, + PostgresTypeId::UNKNOWN, + PostgresTypeId::UUID, + PostgresTypeId::VARBIT, + PostgresTypeId::VARCHAR, + PostgresTypeId::VOID, + PostgresTypeId::XID8, + PostgresTypeId::XID, + PostgresTypeId::XML}; + + if (nested) { + base.push_back(PostgresTypeId::ARRAY); + base.push_back(PostgresTypeId::RECORD); + base.push_back(PostgresTypeId::RANGE); + base.push_back(PostgresTypeId::DOMAIN_); + } + + return base; +} + +} // namespace adbcpq \ No newline at end of file diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 55e05d9d4a..6b045510c5 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -115,16 +115,19 @@ enum class PostgresTypeId { // Returns the receive function name as defined in the typrecieve column // of the pg_type table. This name is the one that gets used to look up // the PostgresTypeId. -static inline const char* PostgresTyprecv(PostgresTypeId type_id); +const char* PostgresTyprecv(PostgresTypeId type_id); // Returns a likely typname value for a given PostgresTypeId. This is useful // for testing and error messages but may not be the actual value present // in the pg_type typname column. -static inline const char* PostgresTypname(PostgresTypeId type_id); +const char* PostgresTypname(PostgresTypeId type_id); // A vector of all type IDs, optionally including the nested types PostgresTypeId::ARRAY, // PostgresTypeId::DOMAIN_, PostgresTypeId::RECORD, and PostgresTypeId::RANGE. -static inline std::vector PostgresTypeIdAll(bool nested = true); +std::vector PostgresTypeIdAll(bool nested = true); + +// Forward-declare the type resolver for use in PostgresType::FromSchema +class PostgresTypeResolver; // An abstraction of a (potentially nested and/or parameterized) Postgres // data type. This class is where default type conversion to/from Arrow @@ -187,65 +190,11 @@ class PostgresType { // do not have a corresponding Arrow type are returned as Binary with field // metadata ADBC:posgresql:typname. These types can be represented as their // binary COPY representation in the output. - ArrowErrorCode SetSchema(ArrowSchema* schema) const { - switch (type_id_) { - case PostgresTypeId::BOOL: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); - break; - case PostgresTypeId::INT2: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); - break; - case PostgresTypeId::INT4: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); - break; - case PostgresTypeId::INT8: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); - break; - case PostgresTypeId::FLOAT4: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); - break; - case PostgresTypeId::FLOAT8: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); - break; - case PostgresTypeId::CHAR: - case PostgresTypeId::BPCHAR: - case PostgresTypeId::VARCHAR: - case PostgresTypeId::TEXT: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); - break; - case PostgresTypeId::BYTEA: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); - break; - - case PostgresTypeId::RECORD: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); - for (int64_t i = 0; i < n_children(); i++) { - NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); - } - break; - - case PostgresTypeId::ARRAY: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); - NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); - break; - default: { - // For any types we don't explicitly know how to deal with, we can still - // return the bytes postgres gives us and attach the type name as metadata - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); - nanoarrow::UniqueBuffer buffer; - ArrowMetadataBuilderInit(buffer.get(), nullptr); - NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( - buffer.get(), ArrowCharView("ADBC:postgresql:typname"), - ArrowCharView(typname_.c_str()))); - NANOARROW_RETURN_NOT_OK( - ArrowSchemaSetMetadata(schema, reinterpret_cast(buffer->data))); - break; - } - } + ArrowErrorCode SetSchema(ArrowSchema* schema) const; - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, field_name_.c_str())); - return NANOARROW_OK; - } + static ArrowErrorCode FromSchema(const PostgresTypeResolver& resolver, + ArrowSchema* schema, PostgresType* out, + ArrowError* error); private: uint32_t oid_; @@ -318,67 +267,7 @@ class PostgresTypeResolver { // of Inserts matters: Non-array types must be inserted before the corresponding // array types and class definitions must be inserted before the corresponding // class type using InsertClass(). - ArrowErrorCode Insert(const Item& item, ArrowError* error) { - auto result = base_.find(item.typreceive); - if (result == base_.end()) { - ArrowErrorSet(error, "Base type not found for type '%s' with receive function '%s'", - item.typname, item.typreceive); - return ENOTSUP; - } - - const PostgresType& base = result->second; - PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); - - switch (base.type_id()) { - case PostgresTypeId::ARRAY: { - PostgresType child; - NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); - mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - array_mapping_.insert({child.oid(), item.oid}); - break; - } - - case PostgresTypeId::RECORD: { - std::vector> child_desc; - NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); - - PostgresType out(PostgresTypeId::RECORD); - for (const auto& child_item : child_desc) { - PostgresType child; - NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); - out.AppendChild(child_item.first, child); - } - - mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - case PostgresTypeId::DOMAIN_: { - PostgresType base_type; - NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); - mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - case PostgresTypeId::RANGE: { - PostgresType base_type; - NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); - mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - default: - mapping_.insert({item.oid, type}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - return NANOARROW_OK; - } + ArrowErrorCode Insert(const Item& item, ArrowError* error); // Insert a class definition. For the purposes of resolving a PostgresType // instance, this is simply a vector of field_name: oid tuples. The specified @@ -427,466 +316,4 @@ class PostgresTypeResolver { } }; -static inline ArrowErrorCode PostgresTypeFromSchema(const PostgresTypeResolver& resolver, - ArrowSchema* schema, - PostgresType* out, - ArrowError* error) { - ArrowSchemaView schema_view; - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); - - switch (schema_view.type) { - case NANOARROW_TYPE_BOOL: - return resolver.Find(resolver.GetOID(PostgresTypeId::BOOL), out, error); - case NANOARROW_TYPE_INT8: - case NANOARROW_TYPE_UINT8: - case NANOARROW_TYPE_INT16: - return resolver.Find(resolver.GetOID(PostgresTypeId::INT2), out, error); - case NANOARROW_TYPE_UINT16: - case NANOARROW_TYPE_INT32: - return resolver.Find(resolver.GetOID(PostgresTypeId::INT4), out, error); - case NANOARROW_TYPE_UINT32: - case NANOARROW_TYPE_INT64: - return resolver.Find(resolver.GetOID(PostgresTypeId::INT8), out, error); - case NANOARROW_TYPE_FLOAT: - return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT4), out, error); - case NANOARROW_TYPE_DOUBLE: - return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT8), out, error); - case NANOARROW_TYPE_STRING: - return resolver.Find(resolver.GetOID(PostgresTypeId::TEXT), out, error); - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return resolver.Find(resolver.GetOID(PostgresTypeId::BYTEA), out, error); - case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_LARGE_LIST: - case NANOARROW_TYPE_FIXED_SIZE_LIST: { - PostgresType child; - NANOARROW_RETURN_NOT_OK( - PostgresTypeFromSchema(resolver, schema->children[0], &child, error)); - return resolver.FindArray(child.oid(), out, error); - } - - default: - ArrowErrorSet(error, "Can't map Arrow type '%s' to Postgres type", - ArrowTypeString(schema_view.type)); - return ENOTSUP; - } -} - -static inline const char* PostgresTyprecv(PostgresTypeId type_id) { - switch (type_id) { - case PostgresTypeId::ACLITEM: - return "aclitem_recv"; - case PostgresTypeId::ANYARRAY: - return "anyarray_recv"; - case PostgresTypeId::ANYCOMPATIBLEARRAY: - return "anycompatiblearray_recv"; - case PostgresTypeId::ARRAY: - return "array_recv"; - case PostgresTypeId::BIT: - return "bit_recv"; - case PostgresTypeId::BOOL: - return "boolrecv"; - case PostgresTypeId::BOX: - return "box_recv"; - case PostgresTypeId::BPCHAR: - return "bpcharrecv"; - case PostgresTypeId::BRIN_BLOOM_SUMMARY: - return "brin_bloom_summary_recv"; - case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: - return "brin_minmax_multi_summary_recv"; - case PostgresTypeId::BYTEA: - return "bytearecv"; - case PostgresTypeId::CASH: - return "cash_recv"; - case PostgresTypeId::CHAR: - return "charrecv"; - case PostgresTypeId::CIDR: - return "cidr_recv"; - case PostgresTypeId::CID: - return "cidrecv"; - case PostgresTypeId::CIRCLE: - return "circle_recv"; - case PostgresTypeId::CSTRING: - return "cstring_recv"; - case PostgresTypeId::DATE: - return "date_recv"; - case PostgresTypeId::DOMAIN_: - return "domain_recv"; - case PostgresTypeId::FLOAT4: - return "float4recv"; - case PostgresTypeId::FLOAT8: - return "float8recv"; - case PostgresTypeId::INET: - return "inet_recv"; - case PostgresTypeId::INT2: - return "int2recv"; - case PostgresTypeId::INT2VECTOR: - return "int2vectorrecv"; - case PostgresTypeId::INT4: - return "int4recv"; - case PostgresTypeId::INT8: - return "int8recv"; - case PostgresTypeId::INTERVAL: - return "interval_recv"; - case PostgresTypeId::JSON: - return "json_recv"; - case PostgresTypeId::JSONB: - return "jsonb_recv"; - case PostgresTypeId::JSONPATH: - return "jsonpath_recv"; - case PostgresTypeId::LINE: - return "line_recv"; - case PostgresTypeId::LSEG: - return "lseg_recv"; - case PostgresTypeId::MACADDR: - return "macaddr_recv"; - case PostgresTypeId::MACADDR8: - return "macaddr8_recv"; - case PostgresTypeId::MULTIRANGE: - return "multirange_recv"; - case PostgresTypeId::NAME: - return "namerecv"; - case PostgresTypeId::NUMERIC: - return "numeric_recv"; - case PostgresTypeId::OID: - return "oidrecv"; - case PostgresTypeId::OIDVECTOR: - return "oidvectorrecv"; - case PostgresTypeId::PATH: - return "path_recv"; - case PostgresTypeId::PG_NODE_TREE: - return "pg_node_tree_recv"; - case PostgresTypeId::PG_NDISTINCT: - return "pg_ndistinct_recv"; - case PostgresTypeId::PG_DEPENDENCIES: - return "pg_dependencies_recv"; - case PostgresTypeId::PG_LSN: - return "pg_lsn_recv"; - case PostgresTypeId::PG_MCV_LIST: - return "pg_mcv_list_recv"; - case PostgresTypeId::PG_DDL_COMMAND: - return "pg_ddl_command_recv"; - case PostgresTypeId::PG_SNAPSHOT: - return "pg_snapshot_recv"; - case PostgresTypeId::POINT: - return "point_recv"; - case PostgresTypeId::POLY: - return "poly_recv"; - case PostgresTypeId::RANGE: - return "range_recv"; - case PostgresTypeId::RECORD: - return "record_recv"; - case PostgresTypeId::REGCLASS: - return "regclassrecv"; - case PostgresTypeId::REGCOLLATION: - return "regcollationrecv"; - case PostgresTypeId::REGCONFIG: - return "regconfigrecv"; - case PostgresTypeId::REGDICTIONARY: - return "regdictionaryrecv"; - case PostgresTypeId::REGNAMESPACE: - return "regnamespacerecv"; - case PostgresTypeId::REGOPERATOR: - return "regoperatorrecv"; - case PostgresTypeId::REGOPER: - return "regoperrecv"; - case PostgresTypeId::REGPROCEDURE: - return "regprocedurerecv"; - case PostgresTypeId::REGPROC: - return "regprocrecv"; - case PostgresTypeId::REGROLE: - return "regrolerecv"; - case PostgresTypeId::REGTYPE: - return "regtyperecv"; - case PostgresTypeId::TEXT: - return "textrecv"; - case PostgresTypeId::TID: - return "tidrecv"; - case PostgresTypeId::TIME: - return "time_recv"; - case PostgresTypeId::TIMESTAMP: - return "timestamp_recv"; - case PostgresTypeId::TIMESTAMPTZ: - return "timestamptz_recv"; - case PostgresTypeId::TIMETZ: - return "timetz_recv"; - case PostgresTypeId::TSQUERY: - return "tsqueryrecv"; - case PostgresTypeId::TSVECTOR: - return "tsvectorrecv"; - case PostgresTypeId::TXID_SNAPSHOT: - return "txid_snapshot_recv"; - case PostgresTypeId::UNKNOWN: - return "unknownrecv"; - case PostgresTypeId::UUID: - return "uuid_recv"; - case PostgresTypeId::VARBIT: - return "varbit_recv"; - case PostgresTypeId::VARCHAR: - return "varcharrecv"; - case PostgresTypeId::VOID: - return "void_recv"; - case PostgresTypeId::XID8: - return "xid8recv"; - case PostgresTypeId::XID: - return "xidrecv"; - case PostgresTypeId::XML: - return "xml_recv"; - default: - return ""; - } -} - -static inline const char* PostgresTypname(PostgresTypeId type_id) { - switch (type_id) { - case PostgresTypeId::ACLITEM: - return "aclitem"; - case PostgresTypeId::ANYARRAY: - return "anyarray"; - case PostgresTypeId::ANYCOMPATIBLEARRAY: - return "anycompatiblearray"; - case PostgresTypeId::ARRAY: - return "array"; - case PostgresTypeId::BIT: - return "bit"; - case PostgresTypeId::BOOL: - return "bool"; - case PostgresTypeId::BOX: - return "box"; - case PostgresTypeId::BPCHAR: - return "bpchar"; - case PostgresTypeId::BRIN_BLOOM_SUMMARY: - return "brin_bloom_summary"; - case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: - return "brin_minmax_multi_summary"; - case PostgresTypeId::BYTEA: - return "bytea"; - case PostgresTypeId::CASH: - return "cash"; - case PostgresTypeId::CHAR: - return "char"; - case PostgresTypeId::CIDR: - return "cidr"; - case PostgresTypeId::CID: - return "cid"; - case PostgresTypeId::CIRCLE: - return "circle"; - case PostgresTypeId::CSTRING: - return "cstring"; - case PostgresTypeId::DATE: - return "date"; - case PostgresTypeId::DOMAIN_: - return "domain"; - case PostgresTypeId::FLOAT4: - return "float4"; - case PostgresTypeId::FLOAT8: - return "float8"; - case PostgresTypeId::INET: - return "inet"; - case PostgresTypeId::INT2: - return "int2"; - case PostgresTypeId::INT2VECTOR: - return "int2vector"; - case PostgresTypeId::INT4: - return "int4"; - case PostgresTypeId::INT8: - return "int8"; - case PostgresTypeId::INTERVAL: - return "interval"; - case PostgresTypeId::JSON: - return "json"; - case PostgresTypeId::JSONB: - return "jsonb"; - case PostgresTypeId::JSONPATH: - return "jsonpath"; - case PostgresTypeId::LINE: - return "line"; - case PostgresTypeId::LSEG: - return "lseg"; - case PostgresTypeId::MACADDR: - return "macaddr"; - case PostgresTypeId::MACADDR8: - return "macaddr8"; - case PostgresTypeId::MULTIRANGE: - return "multirange"; - case PostgresTypeId::NAME: - return "name"; - case PostgresTypeId::NUMERIC: - return "numeric"; - case PostgresTypeId::OID: - return "oid"; - case PostgresTypeId::OIDVECTOR: - return "oidvector"; - case PostgresTypeId::PATH: - return "path"; - case PostgresTypeId::PG_NODE_TREE: - return "pg_node_tree"; - case PostgresTypeId::PG_NDISTINCT: - return "pg_ndistinct"; - case PostgresTypeId::PG_DEPENDENCIES: - return "pg_dependencies"; - case PostgresTypeId::PG_LSN: - return "pg_lsn"; - case PostgresTypeId::PG_MCV_LIST: - return "pg_mcv_list"; - case PostgresTypeId::PG_DDL_COMMAND: - return "pg_ddl_command"; - case PostgresTypeId::PG_SNAPSHOT: - return "pg_snapshot"; - case PostgresTypeId::POINT: - return "point"; - case PostgresTypeId::POLY: - return "poly"; - case PostgresTypeId::RANGE: - return "range"; - case PostgresTypeId::RECORD: - return "record"; - case PostgresTypeId::REGCLASS: - return "regclass"; - case PostgresTypeId::REGCOLLATION: - return "regcollation"; - case PostgresTypeId::REGCONFIG: - return "regconfig"; - case PostgresTypeId::REGDICTIONARY: - return "regdictionary"; - case PostgresTypeId::REGNAMESPACE: - return "regnamespace"; - case PostgresTypeId::REGOPERATOR: - return "regoperator"; - case PostgresTypeId::REGOPER: - return "regoper"; - case PostgresTypeId::REGPROCEDURE: - return "regprocedure"; - case PostgresTypeId::REGPROC: - return "regproc"; - case PostgresTypeId::REGROLE: - return "regrole"; - case PostgresTypeId::REGTYPE: - return "regtype"; - case PostgresTypeId::TEXT: - return "text"; - case PostgresTypeId::TID: - return "tid"; - case PostgresTypeId::TIME: - return "time"; - case PostgresTypeId::TIMESTAMP: - return "timestamp"; - case PostgresTypeId::TIMESTAMPTZ: - return "timestamptz"; - case PostgresTypeId::TIMETZ: - return "timetz"; - case PostgresTypeId::TSQUERY: - return "tsquery"; - case PostgresTypeId::TSVECTOR: - return "tsvector"; - case PostgresTypeId::TXID_SNAPSHOT: - return "txid_snapshot"; - case PostgresTypeId::UNKNOWN: - return "unknown"; - case PostgresTypeId::UUID: - return "uuid"; - case PostgresTypeId::VARBIT: - return "varbit"; - case PostgresTypeId::VARCHAR: - return "varchar"; - case PostgresTypeId::VOID: - return "void"; - case PostgresTypeId::XID8: - return "xid8"; - case PostgresTypeId::XID: - return "xid"; - case PostgresTypeId::XML: - return "xml"; - default: - return ""; - } -} - -static inline std::vector PostgresTypeIdAll(bool nested) { - std::vector base = {PostgresTypeId::ACLITEM, - PostgresTypeId::ANYARRAY, - PostgresTypeId::ANYCOMPATIBLEARRAY, - PostgresTypeId::BIT, - PostgresTypeId::BOOL, - PostgresTypeId::BOX, - PostgresTypeId::BPCHAR, - PostgresTypeId::BRIN_BLOOM_SUMMARY, - PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY, - PostgresTypeId::BYTEA, - PostgresTypeId::CASH, - PostgresTypeId::CHAR, - PostgresTypeId::CIDR, - PostgresTypeId::CID, - PostgresTypeId::CIRCLE, - PostgresTypeId::CSTRING, - PostgresTypeId::DATE, - PostgresTypeId::FLOAT4, - PostgresTypeId::FLOAT8, - PostgresTypeId::INET, - PostgresTypeId::INT2, - PostgresTypeId::INT2VECTOR, - PostgresTypeId::INT4, - PostgresTypeId::INT8, - PostgresTypeId::INTERVAL, - PostgresTypeId::JSON, - PostgresTypeId::JSONB, - PostgresTypeId::JSONPATH, - PostgresTypeId::LINE, - PostgresTypeId::LSEG, - PostgresTypeId::MACADDR, - PostgresTypeId::MACADDR8, - PostgresTypeId::MULTIRANGE, - PostgresTypeId::NAME, - PostgresTypeId::NUMERIC, - PostgresTypeId::OID, - PostgresTypeId::OIDVECTOR, - PostgresTypeId::PATH, - PostgresTypeId::PG_NODE_TREE, - PostgresTypeId::PG_NDISTINCT, - PostgresTypeId::PG_DEPENDENCIES, - PostgresTypeId::PG_LSN, - PostgresTypeId::PG_MCV_LIST, - PostgresTypeId::PG_DDL_COMMAND, - PostgresTypeId::PG_SNAPSHOT, - PostgresTypeId::POINT, - PostgresTypeId::POLY, - PostgresTypeId::REGCLASS, - PostgresTypeId::REGCOLLATION, - PostgresTypeId::REGCONFIG, - PostgresTypeId::REGDICTIONARY, - PostgresTypeId::REGNAMESPACE, - PostgresTypeId::REGOPERATOR, - PostgresTypeId::REGOPER, - PostgresTypeId::REGPROCEDURE, - PostgresTypeId::REGPROC, - PostgresTypeId::REGROLE, - PostgresTypeId::REGTYPE, - PostgresTypeId::TEXT, - PostgresTypeId::TID, - PostgresTypeId::TIME, - PostgresTypeId::TIMESTAMP, - PostgresTypeId::TIMESTAMPTZ, - PostgresTypeId::TIMETZ, - PostgresTypeId::TSQUERY, - PostgresTypeId::TSVECTOR, - PostgresTypeId::TXID_SNAPSHOT, - PostgresTypeId::UNKNOWN, - PostgresTypeId::UUID, - PostgresTypeId::VARBIT, - PostgresTypeId::VARCHAR, - PostgresTypeId::VOID, - PostgresTypeId::XID8, - PostgresTypeId::XID, - PostgresTypeId::XML}; - - if (nested) { - base.push_back(PostgresTypeId::ARRAY); - base.push_back(PostgresTypeId::RECORD); - base.push_back(PostgresTypeId::RANGE); - base.push_back(PostgresTypeId::DOMAIN_); - } - - return base; -} - } // namespace adbcpq diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index e4bc039f28..46bfd00daa 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -187,7 +187,8 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { EXPECT_STREQ(schema.format, "z"); ArrowStringView value = ArrowCharView(""); - ArrowMetadataGetValue(schema.metadata, ArrowCharView("ADBC:posgresql:typname"), &value); + ArrowMetadataGetValue(schema.metadata, ArrowCharView("ADBC:postgresql:typname"), + &value); EXPECT_EQ(std::string(value.data, value.size_bytes), "some_name"); schema.release(&schema); } @@ -199,73 +200,73 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(resolver.Init(), NANOARROW_OK); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BOOL), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT8), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT8), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT16), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT16), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT32), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT32), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT64), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::INT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_FLOAT), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::FLOAT4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_DOUBLE), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::FLOAT8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BINARY), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::BYTEA); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_STRING), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::TEXT); schema.release(&schema); @@ -273,7 +274,7 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ArrowSchemaInit(&schema); ASSERT_EQ(ArrowSchemaSetType(&schema, NANOARROW_TYPE_LIST), NANOARROW_OK); ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_BOOL), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, nullptr), + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); EXPECT_EQ(type.type_id(), PostgresTypeId::ARRAY); EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); @@ -282,7 +283,7 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ArrowError error; ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO), NANOARROW_OK); - EXPECT_EQ(adbcpq::PostgresTypeFromSchema(resolver, &schema, &type, &error), ENOTSUP); + EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, &error), ENOTSUP); EXPECT_STREQ(error.message, "Can't map Arrow type 'interval_month_day_nano' to Postgres type"); schema.release(&schema); From ade7d5ad0fd6f502e7fb1e82ce6cf733246f6197 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:40:22 -0300 Subject: [PATCH 79/90] newline at end of file --- c/driver/postgresql/postgres_type.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc index 1bb8fe5563..5c0c04f9ab 100644 --- a/c/driver/postgresql/postgres_type.cc +++ b/c/driver/postgresql/postgres_type.cc @@ -604,4 +604,4 @@ std::vector PostgresTypeIdAll(bool nested) { return base; } -} // namespace adbcpq \ No newline at end of file +} // namespace adbcpq From de8a81f9397d974420f6a119fb1133698225b54b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:42:15 -0300 Subject: [PATCH 80/90] better include strategy --- c/driver/postgresql/postgres_type.cc | 6 +++++- c/driver/postgresql/postgres_type.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc index 5c0c04f9ab..ca246d7fb2 100644 --- a/c/driver/postgresql/postgres_type.cc +++ b/c/driver/postgresql/postgres_type.cc @@ -15,7 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "nanoarrow/nanoarrow.hpp" +#include +#include +#include + +#include #include "postgres_type.h" diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 6b045510c5..4475a6fac9 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -24,7 +24,7 @@ #include #include -#include +#include namespace adbcpq { From e1c081bd6c56efc249ad5808a5399ca8b8eb604f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:52:32 -0300 Subject: [PATCH 81/90] delete vendored files --- r/adbcpostgresql/src/database.cc | 124 ------- r/adbcpostgresql/src/postgresql.cc | 499 ----------------------------- 2 files changed, 623 deletions(-) delete mode 100644 r/adbcpostgresql/src/database.cc delete mode 100644 r/adbcpostgresql/src/postgresql.cc diff --git a/r/adbcpostgresql/src/database.cc b/r/adbcpostgresql/src/database.cc deleted file mode 100644 index bc5e0ec2ff..0000000000 --- a/r/adbcpostgresql/src/database.cc +++ /dev/null @@ -1,124 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "database.h" - -#include -#include - -#include -#include -#include - -#include "util.h" - -namespace adbcpq { - -PostgresDatabase::PostgresDatabase() : open_connections_(0) { - type_mapping_ = std::make_shared(); -} -PostgresDatabase::~PostgresDatabase() = default; - -AdbcStatusCode PostgresDatabase::Init(struct AdbcError* error) { - // Connect to validate the parameters. - PGconn* conn = nullptr; - AdbcStatusCode final_status = Connect(&conn, error); - if (final_status != ADBC_STATUS_OK) { - return final_status; - } - - // Build the type mapping table. - const std::string kTypeQuery = R"( -SELECT - oid, - typname, - typreceive -FROM - pg_catalog.pg_type -)"; - - pg_result* result = PQexec(conn, kTypeQuery.c_str()); - ExecStatusType pq_status = PQresultStatus(result); - if (pq_status == PGRES_TUPLES_OK) { - int num_rows = PQntuples(result); - for (int row = 0; row < num_rows; row++) { - const uint32_t oid = static_cast( - std::strtol(PQgetvalue(result, row, 0), /*str_end=*/nullptr, /*base=*/10)); - const char* typname = PQgetvalue(result, row, 1); - const char* typreceive = PQgetvalue(result, row, 2); - - type_mapping_->Insert(oid, typname, typreceive); - } - } else { - SetError(error, "Failed to build type mapping table: ", PQerrorMessage(conn)); - final_status = ADBC_STATUS_IO; - } - PQclear(result); - - // Disconnect since PostgreSQL connections can be heavy. - { - AdbcStatusCode status = Disconnect(&conn, error); - if (status != ADBC_STATUS_OK) final_status = status; - } - return final_status; -} - -AdbcStatusCode PostgresDatabase::Release(struct AdbcError* error) { - if (open_connections_ != 0) { - SetError(error, "Database released with ", open_connections_, " open connections"); - return ADBC_STATUS_INVALID_STATE; - } - return ADBC_STATUS_OK; -} - -AdbcStatusCode PostgresDatabase::SetOption(const char* key, const char* value, - struct AdbcError* error) { - if (strcmp(key, "uri") == 0) { - uri_ = value; - } else { - SetError(error, "Unknown database option ", key); - return ADBC_STATUS_NOT_IMPLEMENTED; - } - return ADBC_STATUS_OK; -} - -AdbcStatusCode PostgresDatabase::Connect(PGconn** conn, struct AdbcError* error) { - if (uri_.empty()) { - SetError(error, "Must set database option 'uri' before creating a connection"); - return ADBC_STATUS_INVALID_STATE; - } - *conn = PQconnectdb(uri_.c_str()); - if (PQstatus(*conn) != CONNECTION_OK) { - SetError(error, "Failed to connect: ", PQerrorMessage(*conn)); - PQfinish(*conn); - *conn = nullptr; - return ADBC_STATUS_IO; - } - open_connections_++; - return ADBC_STATUS_OK; -} - -AdbcStatusCode PostgresDatabase::Disconnect(PGconn** conn, struct AdbcError* error) { - PQfinish(*conn); - *conn = nullptr; - if (--open_connections_ < 0) { - SetError(error, "Open connection count underflowed"); - return ADBC_STATUS_INTERNAL; - } - return ADBC_STATUS_OK; -} -} // namespace adbcpq diff --git a/r/adbcpostgresql/src/postgresql.cc b/r/adbcpostgresql/src/postgresql.cc deleted file mode 100644 index d4be5ce82a..0000000000 --- a/r/adbcpostgresql/src/postgresql.cc +++ /dev/null @@ -1,499 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// A libpq-based PostgreSQL driver for ADBC. - -#include -#include - -#include - -#include "connection.h" -#include "database.h" -#include "statement.h" -#include "util.h" - -using adbcpq::PostgresConnection; -using adbcpq::PostgresDatabase; -using adbcpq::PostgresStatement; - -// --------------------------------------------------------------------- -// ADBC interface implementation - as private functions so that these -// don't get replaced by the dynamic linker. If we implemented these -// under the Adbc* names, then DriverInit, the linker may resolve -// functions to the address of the functions provided by the driver -// manager instead of our functions. -// -// We could also: -// - Play games with RTLD_DEEPBIND - but this doesn't work with ASan -// - Use __attribute__((visibility("protected"))) - but this is -// apparently poorly supported by some linkers -// - Play with -Bsymbolic(-functions) - but this has other -// consequences and complicates the build setup -// -// So in the end some manual effort here was chosen. - -// --------------------------------------------------------------------- -// AdbcDatabase - -namespace { -using adbcpq::SetError; -AdbcStatusCode PostgresDatabaseInit(struct AdbcDatabase* database, - struct AdbcError* error) { - if (!database || !database->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = reinterpret_cast*>(database->private_data); - return (*ptr)->Init(error); -} - -AdbcStatusCode PostgresDatabaseNew(struct AdbcDatabase* database, - struct AdbcError* error) { - if (!database) { - SetError(error, "database must not be null"); - return ADBC_STATUS_INVALID_STATE; - } - if (database->private_data) { - SetError(error, "database is already initialized"); - return ADBC_STATUS_INVALID_STATE; - } - auto impl = std::make_shared(); - database->private_data = new std::shared_ptr(impl); - return ADBC_STATUS_OK; -} - -AdbcStatusCode PostgresDatabaseRelease(struct AdbcDatabase* database, - struct AdbcError* error) { - if (!database->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = reinterpret_cast*>(database->private_data); - AdbcStatusCode status = (*ptr)->Release(error); - delete ptr; - database->private_data = nullptr; - return status; -} - -AdbcStatusCode PostgresDatabaseSetOption(struct AdbcDatabase* database, const char* key, - const char* value, struct AdbcError* error) { - if (!database || !database->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = reinterpret_cast*>(database->private_data); - return (*ptr)->SetOption(key, value, error); -} -} // namespace - -AdbcStatusCode AdbcDatabaseInit(struct AdbcDatabase* database, struct AdbcError* error) { - return PostgresDatabaseInit(database, error); -} - -AdbcStatusCode AdbcDatabaseNew(struct AdbcDatabase* database, struct AdbcError* error) { - return PostgresDatabaseNew(database, error); -} - -AdbcStatusCode AdbcDatabaseRelease(struct AdbcDatabase* database, - struct AdbcError* error) { - return PostgresDatabaseRelease(database, error); -} - -AdbcStatusCode AdbcDatabaseSetOption(struct AdbcDatabase* database, const char* key, - const char* value, struct AdbcError* error) { - return PostgresDatabaseSetOption(database, key, value, error); -} - -// --------------------------------------------------------------------- -// AdbcConnection - -namespace { -AdbcStatusCode PostgresConnectionCommit(struct AdbcConnection* connection, - struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = - reinterpret_cast*>(connection->private_data); - return (*ptr)->Commit(error); -} - -AdbcStatusCode PostgresConnectionGetInfo(struct AdbcConnection* connection, - uint32_t* info_codes, size_t info_codes_length, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresConnectionGetObjects( - struct AdbcConnection* connection, int depth, const char* catalog, - const char* db_schema, const char* table_name, const char** table_types, - const char* column_name, struct ArrowArrayStream* stream, struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresConnectionGetTableSchema( - struct AdbcConnection* connection, const char* catalog, const char* db_schema, - const char* table_name, struct ArrowSchema* schema, struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = - reinterpret_cast*>(connection->private_data); - return (*ptr)->GetTableSchema(catalog, db_schema, table_name, schema, error); -} - -AdbcStatusCode PostgresConnectionGetTableTypes(struct AdbcConnection* connection, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresConnectionInit(struct AdbcConnection* connection, - struct AdbcDatabase* database, - struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = - reinterpret_cast*>(connection->private_data); - return (*ptr)->Init(database, error); -} - -AdbcStatusCode PostgresConnectionNew(struct AdbcConnection* connection, - struct AdbcError* error) { - auto impl = std::make_shared(); - connection->private_data = new std::shared_ptr(impl); - return ADBC_STATUS_OK; -} - -AdbcStatusCode PostgresConnectionReadPartition(struct AdbcConnection* connection, - const uint8_t* serialized_partition, - size_t serialized_length, - struct ArrowArrayStream* out, - struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresConnectionRelease(struct AdbcConnection* connection, - struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = - reinterpret_cast*>(connection->private_data); - AdbcStatusCode status = (*ptr)->Release(error); - delete ptr; - connection->private_data = nullptr; - return status; -} - -AdbcStatusCode PostgresConnectionRollback(struct AdbcConnection* connection, - struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = - reinterpret_cast*>(connection->private_data); - return (*ptr)->Rollback(error); -} - -AdbcStatusCode PostgresConnectionSetOption(struct AdbcConnection* connection, - const char* key, const char* value, - struct AdbcError* error) { - if (!connection->private_data) return ADBC_STATUS_INVALID_STATE; - auto ptr = - reinterpret_cast*>(connection->private_data); - return (*ptr)->SetOption(key, value, error); -} - -} // namespace -AdbcStatusCode AdbcConnectionCommit(struct AdbcConnection* connection, - struct AdbcError* error) { - return PostgresConnectionCommit(connection, error); -} - -AdbcStatusCode AdbcConnectionGetInfo(struct AdbcConnection* connection, - uint32_t* info_codes, size_t info_codes_length, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - return PostgresConnectionGetInfo(connection, info_codes, info_codes_length, stream, - error); -} - -AdbcStatusCode AdbcConnectionGetObjects(struct AdbcConnection* connection, int depth, - const char* catalog, const char* db_schema, - const char* table_name, const char** table_types, - const char* column_name, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - return PostgresConnectionGetObjects(connection, depth, catalog, db_schema, table_name, - table_types, column_name, stream, error); -} - -AdbcStatusCode AdbcConnectionGetTableSchema(struct AdbcConnection* connection, - const char* catalog, const char* db_schema, - const char* table_name, - struct ArrowSchema* schema, - struct AdbcError* error) { - return PostgresConnectionGetTableSchema(connection, catalog, db_schema, table_name, - schema, error); -} - -AdbcStatusCode AdbcConnectionGetTableTypes(struct AdbcConnection* connection, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - return PostgresConnectionGetTableTypes(connection, stream, error); -} - -AdbcStatusCode AdbcConnectionInit(struct AdbcConnection* connection, - struct AdbcDatabase* database, - struct AdbcError* error) { - return PostgresConnectionInit(connection, database, error); -} - -AdbcStatusCode AdbcConnectionNew(struct AdbcConnection* connection, - struct AdbcError* error) { - return PostgresConnectionNew(connection, error); -} - -AdbcStatusCode AdbcConnectionReadPartition(struct AdbcConnection* connection, - const uint8_t* serialized_partition, - size_t serialized_length, - struct ArrowArrayStream* out, - struct AdbcError* error) { - return PostgresConnectionReadPartition(connection, serialized_partition, - serialized_length, out, error); -} - -AdbcStatusCode AdbcConnectionRelease(struct AdbcConnection* connection, - struct AdbcError* error) { - return PostgresConnectionRelease(connection, error); -} - -AdbcStatusCode AdbcConnectionRollback(struct AdbcConnection* connection, - struct AdbcError* error) { - return PostgresConnectionRollback(connection, error); -} - -AdbcStatusCode AdbcConnectionSetOption(struct AdbcConnection* connection, const char* key, - const char* value, struct AdbcError* error) { - return PostgresConnectionSetOption(connection, key, value, error); -} - -// --------------------------------------------------------------------- -// AdbcStatement - -namespace { -AdbcStatusCode PostgresStatementBind(struct AdbcStatement* statement, - struct ArrowArray* values, - struct ArrowSchema* schema, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->Bind(values, schema, error); -} - -AdbcStatusCode PostgresStatementBindStream(struct AdbcStatement* statement, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->Bind(stream, error); -} - -AdbcStatusCode PostgresStatementExecutePartitions(struct AdbcStatement* statement, - struct ArrowSchema* schema, - struct AdbcPartitions* partitions, - int64_t* rows_affected, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresStatementExecuteQuery(struct AdbcStatement* statement, - struct ArrowArrayStream* output, - int64_t* rows_affected, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->ExecuteQuery(output, rows_affected, error); -} - -AdbcStatusCode PostgresStatementGetPartitionDesc(struct AdbcStatement* statement, - uint8_t* partition_desc, - struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresStatementGetPartitionDescSize(struct AdbcStatement* statement, - size_t* length, - struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; -} - -AdbcStatusCode PostgresStatementGetParameterSchema(struct AdbcStatement* statement, - struct ArrowSchema* schema, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->GetParameterSchema(schema, error); -} - -AdbcStatusCode PostgresStatementNew(struct AdbcConnection* connection, - struct AdbcStatement* statement, - struct AdbcError* error) { - auto impl = std::make_shared(); - statement->private_data = new std::shared_ptr(impl); - return impl->New(connection, error); -} - -AdbcStatusCode PostgresStatementPrepare(struct AdbcStatement* statement, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->Prepare(error); -} - -AdbcStatusCode PostgresStatementRelease(struct AdbcStatement* statement, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - auto status = (*ptr)->Release(error); - delete ptr; - statement->private_data = nullptr; - return status; -} - -AdbcStatusCode PostgresStatementSetOption(struct AdbcStatement* statement, - const char* key, const char* value, - struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->SetOption(key, value, error); -} - -AdbcStatusCode PostgresStatementSetSqlQuery(struct AdbcStatement* statement, - const char* query, struct AdbcError* error) { - if (!statement->private_data) return ADBC_STATUS_INVALID_STATE; - auto* ptr = - reinterpret_cast*>(statement->private_data); - return (*ptr)->SetSqlQuery(query, error); -} -} // namespace - -AdbcStatusCode AdbcStatementBind(struct AdbcStatement* statement, - struct ArrowArray* values, struct ArrowSchema* schema, - struct AdbcError* error) { - return PostgresStatementBind(statement, values, schema, error); -} - -AdbcStatusCode AdbcStatementBindStream(struct AdbcStatement* statement, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - return PostgresStatementBindStream(statement, stream, error); -} - -AdbcStatusCode AdbcStatementExecutePartitions(struct AdbcStatement* statement, - ArrowSchema* schema, - struct AdbcPartitions* partitions, - int64_t* rows_affected, - struct AdbcError* error) { - return PostgresStatementExecutePartitions(statement, schema, partitions, rows_affected, - error); -} - -AdbcStatusCode AdbcStatementExecuteQuery(struct AdbcStatement* statement, - struct ArrowArrayStream* output, - int64_t* rows_affected, - struct AdbcError* error) { - return PostgresStatementExecuteQuery(statement, output, rows_affected, error); -} - -AdbcStatusCode AdbcStatementGetPartitionDesc(struct AdbcStatement* statement, - uint8_t* partition_desc, - struct AdbcError* error) { - return PostgresStatementGetPartitionDesc(statement, partition_desc, error); -} - -AdbcStatusCode AdbcStatementGetPartitionDescSize(struct AdbcStatement* statement, - size_t* length, - struct AdbcError* error) { - return PostgresStatementGetPartitionDescSize(statement, length, error); -} - -AdbcStatusCode AdbcStatementGetParameterSchema(struct AdbcStatement* statement, - struct ArrowSchema* schema, - struct AdbcError* error) { - return PostgresStatementGetParameterSchema(statement, schema, error); -} - -AdbcStatusCode AdbcStatementNew(struct AdbcConnection* connection, - struct AdbcStatement* statement, - struct AdbcError* error) { - return PostgresStatementNew(connection, statement, error); -} - -AdbcStatusCode AdbcStatementPrepare(struct AdbcStatement* statement, - struct AdbcError* error) { - return PostgresStatementPrepare(statement, error); -} - -AdbcStatusCode AdbcStatementRelease(struct AdbcStatement* statement, - struct AdbcError* error) { - return PostgresStatementRelease(statement, error); -} - -AdbcStatusCode AdbcStatementSetOption(struct AdbcStatement* statement, const char* key, - const char* value, struct AdbcError* error) { - return PostgresStatementSetOption(statement, key, value, error); -} - -AdbcStatusCode AdbcStatementSetSqlQuery(struct AdbcStatement* statement, - const char* query, struct AdbcError* error) { - return PostgresStatementSetSqlQuery(statement, query, error); -} - -extern "C" { -ADBC_EXPORT -AdbcStatusCode AdbcDriverInit(int version, void* raw_driver, struct AdbcError* error) { - if (version != ADBC_VERSION_1_0_0) return ADBC_STATUS_NOT_IMPLEMENTED; - - auto* driver = reinterpret_cast(raw_driver); - std::memset(driver, 0, sizeof(*driver)); - driver->DatabaseInit = PostgresDatabaseInit; - driver->DatabaseNew = PostgresDatabaseNew; - driver->DatabaseRelease = PostgresDatabaseRelease; - driver->DatabaseSetOption = PostgresDatabaseSetOption; - - driver->ConnectionCommit = PostgresConnectionCommit; - driver->ConnectionGetInfo = PostgresConnectionGetInfo; - driver->ConnectionGetObjects = PostgresConnectionGetObjects; - driver->ConnectionGetTableSchema = PostgresConnectionGetTableSchema; - driver->ConnectionGetTableTypes = PostgresConnectionGetTableTypes; - driver->ConnectionInit = PostgresConnectionInit; - driver->ConnectionNew = PostgresConnectionNew; - driver->ConnectionReadPartition = PostgresConnectionReadPartition; - driver->ConnectionRelease = PostgresConnectionRelease; - driver->ConnectionRollback = PostgresConnectionRollback; - driver->ConnectionSetOption = PostgresConnectionSetOption; - - driver->StatementBind = PostgresStatementBind; - driver->StatementBindStream = PostgresStatementBindStream; - driver->StatementExecutePartitions = PostgresStatementExecutePartitions; - driver->StatementExecuteQuery = PostgresStatementExecuteQuery; - driver->StatementGetParameterSchema = PostgresStatementGetParameterSchema; - driver->StatementNew = PostgresStatementNew; - driver->StatementPrepare = PostgresStatementPrepare; - driver->StatementRelease = PostgresStatementRelease; - driver->StatementSetOption = PostgresStatementSetOption; - driver->StatementSetSqlQuery = PostgresStatementSetSqlQuery; - return ADBC_STATUS_OK; -} -} From 6b08ab0fc634d74338a3e8af31ed8b68b4f6e078 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 16:53:53 -0300 Subject: [PATCH 82/90] fix R packaging --- r/adbcpostgresql/bootstrap.R | 1 + r/adbcpostgresql/src/.gitignore | 2 ++ r/adbcpostgresql/src/Makevars.in | 1 + r/adbcpostgresql/src/Makevars.ucrt | 1 + 4 files changed, 5 insertions(+) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index b7c84fc5ae..6ffc043798 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -21,6 +21,7 @@ files_to_vendor <- c( "../../adbc.h", "../../c/driver/postgresql/util.h", "../../c/driver/postgresql/postgres_type.h", + "../../c/driver/postgresql/postgres_type.cc", "../../c/driver/postgresql/statement.h", "../../c/driver/postgresql/statement.cc", "../../c/driver/postgresql/connection.h", diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore index 565c6c8ad6..9c8a8fb23f 100644 --- a/r/adbcpostgresql/src/.gitignore +++ b/r/adbcpostgresql/src/.gitignore @@ -22,9 +22,11 @@ adbc.h connection.cc connection.h database.h +database.cc postgresql.cc statement.h statement.cc postgres_type.h +postgres_type.cc util.h Makevars diff --git a/r/adbcpostgresql/src/Makevars.in b/r/adbcpostgresql/src/Makevars.in index c062d1176e..7d772b0ded 100644 --- a/r/adbcpostgresql/src/Makevars.in +++ b/r/adbcpostgresql/src/Makevars.in @@ -23,4 +23,5 @@ OBJECTS = init.o \ database.o \ statement.o \ postgresql.o \ + postgres_type.o \ nanoarrow/nanoarrow.o diff --git a/r/adbcpostgresql/src/Makevars.ucrt b/r/adbcpostgresql/src/Makevars.ucrt index 0fc2d0c0af..cec428ee34 100644 --- a/r/adbcpostgresql/src/Makevars.ucrt +++ b/r/adbcpostgresql/src/Makevars.ucrt @@ -23,4 +23,5 @@ OBJECTS = init.o \ database.o \ statement.o \ postgresql.o \ + postgres_type.o \ nanoarrow/nanoarrow.o From bb56c3a3f6452b0bd16031c5bbc88590eed20857 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 23:00:32 -0300 Subject: [PATCH 83/90] attempt exporting things for tests on windows --- c/driver/postgresql/CMakeLists.txt | 1 + c/driver/postgresql/postgres_type.h | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 5681039596..67aad1776d 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -85,6 +85,7 @@ if(ADBC_BUILD_TESTS) nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-postgresql-test PRIVATE cxx_std_17) + target_compile_definitions(adbc-driver-postgresql-test PRIVATE -DADBC_BUILDING_TESTS) adbc_configure_target(adbc-driver-postgresql-test) endif() diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 4475a6fac9..863845c847 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -26,6 +26,14 @@ #include +#include + +#if defined(ADBC_BUILDING_TESTS) +#define ADBC_EXPORT_TEST ADBC_EXPORT +#else +#define ADBC_EXPORT_TEST +#endif + namespace adbcpq { // An enum of the types available in most Postgres pg_type tables @@ -190,11 +198,11 @@ class PostgresType { // do not have a corresponding Arrow type are returned as Binary with field // metadata ADBC:posgresql:typname. These types can be represented as their // binary COPY representation in the output. - ArrowErrorCode SetSchema(ArrowSchema* schema) const; + ADBC_EXPORT_TEST ArrowErrorCode SetSchema(ArrowSchema* schema) const; - static ArrowErrorCode FromSchema(const PostgresTypeResolver& resolver, - ArrowSchema* schema, PostgresType* out, - ArrowError* error); + ADBC_EXPORT_TEST static ArrowErrorCode FromSchema(const PostgresTypeResolver& resolver, + ArrowSchema* schema, + PostgresType* out, ArrowError* error); private: uint32_t oid_; @@ -267,7 +275,7 @@ class PostgresTypeResolver { // of Inserts matters: Non-array types must be inserted before the corresponding // array types and class definitions must be inserted before the corresponding // class type using InsertClass(). - ArrowErrorCode Insert(const Item& item, ArrowError* error); + ADBC_EXPORT_TEST ArrowErrorCode Insert(const Item& item, ArrowError* error); // Insert a class definition. For the purposes of resolving a PostgresType // instance, this is simply a vector of field_name: oid tuples. The specified From c90a619ecebc15c57d1b884ccd752bf500c126bb Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Apr 2023 23:07:45 -0300 Subject: [PATCH 84/90] maybe more export symbols --- c/driver/postgresql/postgres_type.cc | 17 +++++++++-------- c/driver/postgresql/postgres_type.h | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc index ca246d7fb2..dd16aa4af8 100644 --- a/c/driver/postgresql/postgres_type.cc +++ b/c/driver/postgresql/postgres_type.cc @@ -25,7 +25,7 @@ namespace adbcpq { -ArrowErrorCode PostgresType::SetSchema(ArrowSchema* schema) const { +ADBC_EXPORT_TEST ArrowErrorCode PostgresType::SetSchema(ArrowSchema* schema) const { switch (type_id_) { case PostgresTypeId::BOOL: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); @@ -85,9 +85,9 @@ ArrowErrorCode PostgresType::SetSchema(ArrowSchema* schema) const { return NANOARROW_OK; } -ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resolver, - ArrowSchema* schema, PostgresType* out, - ArrowError* error) { +ADBC_EXPORT_TEST ArrowErrorCode +PostgresType::FromSchema(const PostgresTypeResolver& resolver, ArrowSchema* schema, + PostgresType* out, ArrowError* error) { ArrowSchemaView schema_view; NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); @@ -129,7 +129,8 @@ ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resolver, } } -ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, ArrowError* error) { +ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, + ArrowError* error) { auto result = base_.find(item.typreceive); if (result == base_.end()) { ArrowErrorSet(error, "Base type not found for type '%s' with receive function '%s'", @@ -191,7 +192,7 @@ ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, ArrowError* error) return NANOARROW_OK; } -const char* PostgresTyprecv(PostgresTypeId type_id) { +ADBC_EXPORT_TEST const char* PostgresTyprecv(PostgresTypeId type_id) { switch (type_id) { case PostgresTypeId::ACLITEM: return "aclitem_recv"; @@ -356,7 +357,7 @@ const char* PostgresTyprecv(PostgresTypeId type_id) { } } -const char* PostgresTypname(PostgresTypeId type_id) { +ADBC_EXPORT_TEST const char* PostgresTypname(PostgresTypeId type_id) { switch (type_id) { case PostgresTypeId::ACLITEM: return "aclitem"; @@ -521,7 +522,7 @@ const char* PostgresTypname(PostgresTypeId type_id) { } } -std::vector PostgresTypeIdAll(bool nested) { +ADBC_EXPORT_TEST std::vector PostgresTypeIdAll(bool nested) { std::vector base = {PostgresTypeId::ACLITEM, PostgresTypeId::ANYARRAY, PostgresTypeId::ANYCOMPATIBLEARRAY, diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 863845c847..0b455b9ee0 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -123,16 +123,16 @@ enum class PostgresTypeId { // Returns the receive function name as defined in the typrecieve column // of the pg_type table. This name is the one that gets used to look up // the PostgresTypeId. -const char* PostgresTyprecv(PostgresTypeId type_id); +ADBC_EXPORT_TEST const char* PostgresTyprecv(PostgresTypeId type_id); // Returns a likely typname value for a given PostgresTypeId. This is useful // for testing and error messages but may not be the actual value present // in the pg_type typname column. -const char* PostgresTypname(PostgresTypeId type_id); +ADBC_EXPORT_TEST const char* PostgresTypname(PostgresTypeId type_id); // A vector of all type IDs, optionally including the nested types PostgresTypeId::ARRAY, // PostgresTypeId::DOMAIN_, PostgresTypeId::RECORD, and PostgresTypeId::RANGE. -std::vector PostgresTypeIdAll(bool nested = true); +ADBC_EXPORT_TEST std::vector PostgresTypeIdAll(bool nested = true); // Forward-declare the type resolver for use in PostgresType::FromSchema class PostgresTypeResolver; From 3ab7a8482b6b67ee49d26cbf872274c0d9ab1a51 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Apr 2023 09:39:30 -0300 Subject: [PATCH 85/90] rename some enums --- c/driver/postgresql/postgres_type.cc | 52 ++++----- c/driver/postgresql/postgres_type.h | 166 +++++++++++++-------------- 2 files changed, 109 insertions(+), 109 deletions(-) diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc index dd16aa4af8..bc494a69f0 100644 --- a/c/driver/postgresql/postgres_type.cc +++ b/c/driver/postgresql/postgres_type.cc @@ -27,42 +27,42 @@ namespace adbcpq { ADBC_EXPORT_TEST ArrowErrorCode PostgresType::SetSchema(ArrowSchema* schema) const { switch (type_id_) { - case PostgresTypeId::BOOL: + case PostgresTypeId::kBool: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; - case PostgresTypeId::INT2: + case PostgresTypeId::kInt2: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); break; - case PostgresTypeId::INT4: + case PostgresTypeId::kInt4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); break; - case PostgresTypeId::INT8: + case PostgresTypeId::kInt8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); break; - case PostgresTypeId::FLOAT4: + case PostgresTypeId::kFloat4: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); break; - case PostgresTypeId::FLOAT8: + case PostgresTypeId::kFloat8: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; - case PostgresTypeId::CHAR: - case PostgresTypeId::BPCHAR: - case PostgresTypeId::VARCHAR: - case PostgresTypeId::TEXT: + case PostgresTypeId::kChar: + case PostgresTypeId::kBpchar: + case PostgresTypeId::kVarchar: + case PostgresTypeId::kText: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; - case PostgresTypeId::BYTEA: + case PostgresTypeId::kBytea: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; - case PostgresTypeId::RECORD: + case PostgresTypeId::kRecord: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); for (int64_t i = 0; i < n_children(); i++) { NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); } break; - case PostgresTypeId::ARRAY: + case PostgresTypeId::kArray: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); break; @@ -93,26 +93,26 @@ PostgresType::FromSchema(const PostgresTypeResolver& resolver, ArrowSchema* sche switch (schema_view.type) { case NANOARROW_TYPE_BOOL: - return resolver.Find(resolver.GetOID(PostgresTypeId::BOOL), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kBool), out, error); case NANOARROW_TYPE_INT8: case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT16: - return resolver.Find(resolver.GetOID(PostgresTypeId::INT2), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kInt2), out, error); case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT32: - return resolver.Find(resolver.GetOID(PostgresTypeId::INT4), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kInt4), out, error); case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT64: - return resolver.Find(resolver.GetOID(PostgresTypeId::INT8), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kInt8), out, error); case NANOARROW_TYPE_FLOAT: - return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT4), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat4), out, error); case NANOARROW_TYPE_DOUBLE: - return resolver.Find(resolver.GetOID(PostgresTypeId::FLOAT8), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat8), out, error); case NANOARROW_TYPE_STRING: - return resolver.Find(resolver.GetOID(PostgresTypeId::TEXT), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kText), out, error); case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return resolver.Find(resolver.GetOID(PostgresTypeId::BYTEA), out, error); + return resolver.Find(resolver.GetOID(PostgresTypeId::kBytea), out, error); case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: { @@ -142,7 +142,7 @@ ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); switch (base.type_id()) { - case PostgresTypeId::ARRAY: { + case PostgresTypeId::kArray: { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); @@ -151,11 +151,11 @@ ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, break; } - case PostgresTypeId::RECORD: { + case PostgresTypeId::kRecord: { std::vector> child_desc; NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); - PostgresType out(PostgresTypeId::RECORD); + PostgresType out(PostgresTypeId::kRecord); for (const auto& child_item : child_desc) { PostgresType child; NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); @@ -167,7 +167,7 @@ ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, break; } - case PostgresTypeId::DOMAIN_: { + case PostgresTypeId::kDomain: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); @@ -175,7 +175,7 @@ ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, break; } - case PostgresTypeId::RANGE: { + case PostgresTypeId::kRange: { PostgresType base_type; NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index 0b455b9ee0..ceb32afec0 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -38,86 +38,86 @@ namespace adbcpq { // An enum of the types available in most Postgres pg_type tables enum class PostgresTypeId { - UNINITIALIZED, - ACLITEM, - ANYARRAY, - ANYCOMPATIBLEARRAY, - ARRAY, - BIT, - BOOL, - BOX, - BPCHAR, - BRIN_BLOOM_SUMMARY, - BRIN_MINMAX_MULTI_SUMMARY, - BYTEA, - CASH, - CHAR, - CIDR, - CID, - CIRCLE, - CSTRING, - DATE, - DOMAIN_, - FLOAT4, - FLOAT8, - INET, - INT2, - INT2VECTOR, - INT4, - INT8, - INTERVAL, - JSON, - JSONB, - JSONPATH, - LINE, - LSEG, - MACADDR, - MACADDR8, - MULTIRANGE, - NAME, - NUMERIC, - OID, - OIDVECTOR, - PATH, - PG_DDL_COMMAND, - PG_DEPENDENCIES, - PG_LSN, - PG_MCV_LIST, - PG_NDISTINCT, - PG_NODE_TREE, - PG_SNAPSHOT, - POINT, - POLY, - RANGE, - RECORD, - REGCLASS, - REGCOLLATION, - REGCONFIG, - REGDICTIONARY, - REGNAMESPACE, - REGOPERATOR, - REGOPER, - REGPROCEDURE, - REGPROC, - REGROLE, - REGTYPE, - TEXT, - TID, - TIME, - TIMESTAMP, - TIMESTAMPTZ, - TIMETZ, - TSQUERY, - TSVECTOR, - TXID_SNAPSHOT, - UNKNOWN, - UUID, - VARBIT, - VARCHAR, - VOID, - XID8, - XID, - XML + kUninitialized, + kAclitem, + kAnyarray, + kAnycompatiblearray, + kArray, + kBit, + kBool, + kBox, + kBpchar, + kBrinBloomSummary, + kBrinMinmaxMultiSummary, + kBytea, + kCash, + kChar, + kCidr, + kCid, + kCircle, + kCstring, + kDate, + kDomain, + kFloat4, + kFloat8, + kInet, + kInt2, + kInt2vector, + kInt4, + kInt8, + kInterval, + kJson, + kJsonb, + kJsonpath, + kLine, + kLseg, + kMacaddr, + kMacaddr8, + kMultirange, + kName, + kNumeric, + kOid, + kOidvector, + kPath, + kPgDdlCommand, + kPgDependencies, + kPgLsn, + kPgMcvList, + kPgNdistinct, + kPgNodeTree, + kPgSnapshot, + kPoint, + kPoly, + kRange, + kRecord, + kRegclass, + kRegcollation, + kRegconfig, + kRegdictionary, + kRegnamespace, + kRegoperator, + kRegoper, + kRegprocedure, + kRegproc, + kRegrole, + kRegtype, + kText, + kTid, + kTime, + kTimestamp, + kTimestamptz, + kTimetz, + kTsquery, + kTsvector, + kTxidSnapshot, + kUnknown, + kUuid, + kVarbit, + kVarchar, + kVoid, + kXid8, + kXid, + kXml, }; // Returns the receive function name as defined in the typrecieve column @@ -144,7 +144,7 @@ class PostgresType { public: explicit PostgresType(PostgresTypeId type_id) : oid_(0), type_id_(type_id) {} - PostgresType() : PostgresType(PostgresTypeId::UNINITIALIZED) {} + PostgresType() : PostgresType(PostgresTypeId::kUninitialized) {} void AppendChild(const std::string& field_name, const PostgresType& type) { PostgresType child(type); @@ -165,7 +165,7 @@ class PostgresType { } PostgresType Array(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(PostgresTypeId::ARRAY); + PostgresType out(PostgresTypeId::kArray); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; @@ -177,7 +177,7 @@ class PostgresType { } PostgresType Range(uint32_t oid = 0, const std::string& typname = "") const { - PostgresType out(PostgresTypeId::RANGE); + PostgresType out(PostgresTypeId::kRange); out.AppendChild("item", *this); out.oid_ = oid; out.typname_ = typname; From ab6316386f8fec2dbd94d659e224b91f4a16638f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Apr 2023 09:48:36 -0300 Subject: [PATCH 86/90] fix all enum values --- c/driver/postgresql/postgres_type.cc | 474 +++++++++++----------- c/driver/postgresql/postgres_type_test.cc | 90 ++-- c/driver/postgresql/statement.cc | 10 +- 3 files changed, 287 insertions(+), 287 deletions(-) diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc index bc494a69f0..c0d73279c8 100644 --- a/c/driver/postgresql/postgres_type.cc +++ b/c/driver/postgresql/postgres_type.cc @@ -194,163 +194,163 @@ ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, ADBC_EXPORT_TEST const char* PostgresTyprecv(PostgresTypeId type_id) { switch (type_id) { - case PostgresTypeId::ACLITEM: + case PostgresTypeId::kAclitem: return "aclitem_recv"; - case PostgresTypeId::ANYARRAY: + case PostgresTypeId::kAnyarray: return "anyarray_recv"; - case PostgresTypeId::ANYCOMPATIBLEARRAY: + case PostgresTypeId::kAnycompatiblearray: return "anycompatiblearray_recv"; - case PostgresTypeId::ARRAY: + case PostgresTypeId::kArray: return "array_recv"; - case PostgresTypeId::BIT: + case PostgresTypeId::kBit: return "bit_recv"; - case PostgresTypeId::BOOL: + case PostgresTypeId::kBool: return "boolrecv"; - case PostgresTypeId::BOX: + case PostgresTypeId::kBox: return "box_recv"; - case PostgresTypeId::BPCHAR: + case PostgresTypeId::kBpchar: return "bpcharrecv"; - case PostgresTypeId::BRIN_BLOOM_SUMMARY: + case PostgresTypeId::kBrinBloomSummary: return "brin_bloom_summary_recv"; - case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: + case PostgresTypeId::kBrinMinmaxMultiSummary: return "brin_minmax_multi_summary_recv"; - case PostgresTypeId::BYTEA: + case PostgresTypeId::kBytea: return "bytearecv"; - case PostgresTypeId::CASH: + case PostgresTypeId::kCash: return "cash_recv"; - case PostgresTypeId::CHAR: + case PostgresTypeId::kChar: return "charrecv"; - case PostgresTypeId::CIDR: + case PostgresTypeId::kCidr: return "cidr_recv"; - case PostgresTypeId::CID: + case PostgresTypeId::kCid: return "cidrecv"; - case PostgresTypeId::CIRCLE: + case PostgresTypeId::kCircle: return "circle_recv"; - case PostgresTypeId::CSTRING: + case PostgresTypeId::kCstring: return "cstring_recv"; - case PostgresTypeId::DATE: + case PostgresTypeId::kDate: return "date_recv"; - case PostgresTypeId::DOMAIN_: + case PostgresTypeId::kDomain: return "domain_recv"; - case PostgresTypeId::FLOAT4: + case PostgresTypeId::kFloat4: return "float4recv"; - case PostgresTypeId::FLOAT8: + case PostgresTypeId::kFloat8: return "float8recv"; - case PostgresTypeId::INET: + case PostgresTypeId::kInet: return "inet_recv"; - case PostgresTypeId::INT2: + case PostgresTypeId::kInt2: return "int2recv"; - case PostgresTypeId::INT2VECTOR: + case PostgresTypeId::kInt2vector: return "int2vectorrecv"; - case PostgresTypeId::INT4: + case PostgresTypeId::kInt4: return "int4recv"; - case PostgresTypeId::INT8: + case PostgresTypeId::kInt8: return "int8recv"; - case PostgresTypeId::INTERVAL: + case PostgresTypeId::kInterval: return "interval_recv"; - case PostgresTypeId::JSON: + case PostgresTypeId::kJson: return "json_recv"; - case PostgresTypeId::JSONB: + case PostgresTypeId::kJsonb: return "jsonb_recv"; - case PostgresTypeId::JSONPATH: + case PostgresTypeId::kJsonpath: return "jsonpath_recv"; - case PostgresTypeId::LINE: + case PostgresTypeId::kLine: return "line_recv"; - case PostgresTypeId::LSEG: + case PostgresTypeId::kLseg: return "lseg_recv"; - case PostgresTypeId::MACADDR: + case PostgresTypeId::kMacaddr: return "macaddr_recv"; - case PostgresTypeId::MACADDR8: + case PostgresTypeId::kMacaddr8: return "macaddr8_recv"; - case PostgresTypeId::MULTIRANGE: + case PostgresTypeId::kMultirange: return "multirange_recv"; - case PostgresTypeId::NAME: + case PostgresTypeId::kName: return "namerecv"; - case PostgresTypeId::NUMERIC: + case PostgresTypeId::kNumeric: return "numeric_recv"; - case PostgresTypeId::OID: + case PostgresTypeId::kOid: return "oidrecv"; - case PostgresTypeId::OIDVECTOR: + case PostgresTypeId::kOidvector: return "oidvectorrecv"; - case PostgresTypeId::PATH: + case PostgresTypeId::kPath: return "path_recv"; - case PostgresTypeId::PG_NODE_TREE: + case PostgresTypeId::kPgNodeTree: return "pg_node_tree_recv"; - case PostgresTypeId::PG_NDISTINCT: + case PostgresTypeId::kPgNdistinct: return "pg_ndistinct_recv"; - case PostgresTypeId::PG_DEPENDENCIES: + case PostgresTypeId::kPgDependencies: return "pg_dependencies_recv"; - case PostgresTypeId::PG_LSN: + case PostgresTypeId::kPgLsn: return "pg_lsn_recv"; - case PostgresTypeId::PG_MCV_LIST: + case PostgresTypeId::kPgMcvList: return "pg_mcv_list_recv"; - case PostgresTypeId::PG_DDL_COMMAND: + case PostgresTypeId::kPgDdlCommand: return "pg_ddl_command_recv"; - case PostgresTypeId::PG_SNAPSHOT: + case PostgresTypeId::kPgSnapshot: return "pg_snapshot_recv"; - case PostgresTypeId::POINT: + case PostgresTypeId::kPoint: return "point_recv"; - case PostgresTypeId::POLY: + case PostgresTypeId::kPoly: return "poly_recv"; - case PostgresTypeId::RANGE: + case PostgresTypeId::kRange: return "range_recv"; - case PostgresTypeId::RECORD: + case PostgresTypeId::kRecord: return "record_recv"; - case PostgresTypeId::REGCLASS: + case PostgresTypeId::kRegclass: return "regclassrecv"; - case PostgresTypeId::REGCOLLATION: + case PostgresTypeId::kRegcollation: return "regcollationrecv"; - case PostgresTypeId::REGCONFIG: + case PostgresTypeId::kRegconfig: return "regconfigrecv"; - case PostgresTypeId::REGDICTIONARY: + case PostgresTypeId::kRegdictionary: return "regdictionaryrecv"; - case PostgresTypeId::REGNAMESPACE: + case PostgresTypeId::kRegnamespace: return "regnamespacerecv"; - case PostgresTypeId::REGOPERATOR: + case PostgresTypeId::kRegoperator: return "regoperatorrecv"; - case PostgresTypeId::REGOPER: + case PostgresTypeId::kRegoper: return "regoperrecv"; - case PostgresTypeId::REGPROCEDURE: + case PostgresTypeId::kRegprocedure: return "regprocedurerecv"; - case PostgresTypeId::REGPROC: + case PostgresTypeId::kRegproc: return "regprocrecv"; - case PostgresTypeId::REGROLE: + case PostgresTypeId::kRegrole: return "regrolerecv"; - case PostgresTypeId::REGTYPE: + case PostgresTypeId::kRegtype: return "regtyperecv"; - case PostgresTypeId::TEXT: + case PostgresTypeId::kText: return "textrecv"; - case PostgresTypeId::TID: + case PostgresTypeId::kTid: return "tidrecv"; - case PostgresTypeId::TIME: + case PostgresTypeId::kTime: return "time_recv"; - case PostgresTypeId::TIMESTAMP: + case PostgresTypeId::kTimestamp: return "timestamp_recv"; - case PostgresTypeId::TIMESTAMPTZ: + case PostgresTypeId::kTimestamptz: return "timestamptz_recv"; - case PostgresTypeId::TIMETZ: + case PostgresTypeId::kTimetz: return "timetz_recv"; - case PostgresTypeId::TSQUERY: + case PostgresTypeId::kTsquery: return "tsqueryrecv"; - case PostgresTypeId::TSVECTOR: + case PostgresTypeId::kTsvector: return "tsvectorrecv"; - case PostgresTypeId::TXID_SNAPSHOT: + case PostgresTypeId::kTxidSnapshot: return "txid_snapshot_recv"; - case PostgresTypeId::UNKNOWN: + case PostgresTypeId::kUnknown: return "unknownrecv"; - case PostgresTypeId::UUID: + case PostgresTypeId::kUuid: return "uuid_recv"; - case PostgresTypeId::VARBIT: + case PostgresTypeId::kVarbit: return "varbit_recv"; - case PostgresTypeId::VARCHAR: + case PostgresTypeId::kVarchar: return "varcharrecv"; - case PostgresTypeId::VOID: + case PostgresTypeId::kVoid: return "void_recv"; - case PostgresTypeId::XID8: + case PostgresTypeId::kXid8: return "xid8recv"; - case PostgresTypeId::XID: + case PostgresTypeId::kXid: return "xidrecv"; - case PostgresTypeId::XML: + case PostgresTypeId::kXml: return "xml_recv"; default: return ""; @@ -359,163 +359,163 @@ ADBC_EXPORT_TEST const char* PostgresTyprecv(PostgresTypeId type_id) { ADBC_EXPORT_TEST const char* PostgresTypname(PostgresTypeId type_id) { switch (type_id) { - case PostgresTypeId::ACLITEM: + case PostgresTypeId::kAclitem: return "aclitem"; - case PostgresTypeId::ANYARRAY: + case PostgresTypeId::kAnyarray: return "anyarray"; - case PostgresTypeId::ANYCOMPATIBLEARRAY: + case PostgresTypeId::kAnycompatiblearray: return "anycompatiblearray"; - case PostgresTypeId::ARRAY: + case PostgresTypeId::kArray: return "array"; - case PostgresTypeId::BIT: + case PostgresTypeId::kBit: return "bit"; - case PostgresTypeId::BOOL: + case PostgresTypeId::kBool: return "bool"; - case PostgresTypeId::BOX: + case PostgresTypeId::kBox: return "box"; - case PostgresTypeId::BPCHAR: + case PostgresTypeId::kBpchar: return "bpchar"; - case PostgresTypeId::BRIN_BLOOM_SUMMARY: + case PostgresTypeId::kBrinBloomSummary: return "brin_bloom_summary"; - case PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY: + case PostgresTypeId::kBrinMinmaxMultiSummary: return "brin_minmax_multi_summary"; - case PostgresTypeId::BYTEA: + case PostgresTypeId::kBytea: return "bytea"; - case PostgresTypeId::CASH: + case PostgresTypeId::kCash: return "cash"; - case PostgresTypeId::CHAR: + case PostgresTypeId::kChar: return "char"; - case PostgresTypeId::CIDR: + case PostgresTypeId::kCidr: return "cidr"; - case PostgresTypeId::CID: + case PostgresTypeId::kCid: return "cid"; - case PostgresTypeId::CIRCLE: + case PostgresTypeId::kCircle: return "circle"; - case PostgresTypeId::CSTRING: + case PostgresTypeId::kCstring: return "cstring"; - case PostgresTypeId::DATE: + case PostgresTypeId::kDate: return "date"; - case PostgresTypeId::DOMAIN_: + case PostgresTypeId::kDomain: return "domain"; - case PostgresTypeId::FLOAT4: + case PostgresTypeId::kFloat4: return "float4"; - case PostgresTypeId::FLOAT8: + case PostgresTypeId::kFloat8: return "float8"; - case PostgresTypeId::INET: + case PostgresTypeId::kInet: return "inet"; - case PostgresTypeId::INT2: + case PostgresTypeId::kInt2: return "int2"; - case PostgresTypeId::INT2VECTOR: + case PostgresTypeId::kInt2vector: return "int2vector"; - case PostgresTypeId::INT4: + case PostgresTypeId::kInt4: return "int4"; - case PostgresTypeId::INT8: + case PostgresTypeId::kInt8: return "int8"; - case PostgresTypeId::INTERVAL: + case PostgresTypeId::kInterval: return "interval"; - case PostgresTypeId::JSON: + case PostgresTypeId::kJson: return "json"; - case PostgresTypeId::JSONB: + case PostgresTypeId::kJsonb: return "jsonb"; - case PostgresTypeId::JSONPATH: + case PostgresTypeId::kJsonpath: return "jsonpath"; - case PostgresTypeId::LINE: + case PostgresTypeId::kLine: return "line"; - case PostgresTypeId::LSEG: + case PostgresTypeId::kLseg: return "lseg"; - case PostgresTypeId::MACADDR: + case PostgresTypeId::kMacaddr: return "macaddr"; - case PostgresTypeId::MACADDR8: + case PostgresTypeId::kMacaddr8: return "macaddr8"; - case PostgresTypeId::MULTIRANGE: + case PostgresTypeId::kMultirange: return "multirange"; - case PostgresTypeId::NAME: + case PostgresTypeId::kName: return "name"; - case PostgresTypeId::NUMERIC: + case PostgresTypeId::kNumeric: return "numeric"; - case PostgresTypeId::OID: + case PostgresTypeId::kOid: return "oid"; - case PostgresTypeId::OIDVECTOR: + case PostgresTypeId::kOidvector: return "oidvector"; - case PostgresTypeId::PATH: + case PostgresTypeId::kPath: return "path"; - case PostgresTypeId::PG_NODE_TREE: + case PostgresTypeId::kPgNodeTree: return "pg_node_tree"; - case PostgresTypeId::PG_NDISTINCT: + case PostgresTypeId::kPgNdistinct: return "pg_ndistinct"; - case PostgresTypeId::PG_DEPENDENCIES: + case PostgresTypeId::kPgDependencies: return "pg_dependencies"; - case PostgresTypeId::PG_LSN: + case PostgresTypeId::kPgLsn: return "pg_lsn"; - case PostgresTypeId::PG_MCV_LIST: + case PostgresTypeId::kPgMcvList: return "pg_mcv_list"; - case PostgresTypeId::PG_DDL_COMMAND: + case PostgresTypeId::kPgDdlCommand: return "pg_ddl_command"; - case PostgresTypeId::PG_SNAPSHOT: + case PostgresTypeId::kPgSnapshot: return "pg_snapshot"; - case PostgresTypeId::POINT: + case PostgresTypeId::kPoint: return "point"; - case PostgresTypeId::POLY: + case PostgresTypeId::kPoly: return "poly"; - case PostgresTypeId::RANGE: + case PostgresTypeId::kRange: return "range"; - case PostgresTypeId::RECORD: + case PostgresTypeId::kRecord: return "record"; - case PostgresTypeId::REGCLASS: + case PostgresTypeId::kRegclass: return "regclass"; - case PostgresTypeId::REGCOLLATION: + case PostgresTypeId::kRegcollation: return "regcollation"; - case PostgresTypeId::REGCONFIG: + case PostgresTypeId::kRegconfig: return "regconfig"; - case PostgresTypeId::REGDICTIONARY: + case PostgresTypeId::kRegdictionary: return "regdictionary"; - case PostgresTypeId::REGNAMESPACE: + case PostgresTypeId::kRegnamespace: return "regnamespace"; - case PostgresTypeId::REGOPERATOR: + case PostgresTypeId::kRegoperator: return "regoperator"; - case PostgresTypeId::REGOPER: + case PostgresTypeId::kRegoper: return "regoper"; - case PostgresTypeId::REGPROCEDURE: + case PostgresTypeId::kRegprocedure: return "regprocedure"; - case PostgresTypeId::REGPROC: + case PostgresTypeId::kRegproc: return "regproc"; - case PostgresTypeId::REGROLE: + case PostgresTypeId::kRegrole: return "regrole"; - case PostgresTypeId::REGTYPE: + case PostgresTypeId::kRegtype: return "regtype"; - case PostgresTypeId::TEXT: + case PostgresTypeId::kText: return "text"; - case PostgresTypeId::TID: + case PostgresTypeId::kTid: return "tid"; - case PostgresTypeId::TIME: + case PostgresTypeId::kTime: return "time"; - case PostgresTypeId::TIMESTAMP: + case PostgresTypeId::kTimestamp: return "timestamp"; - case PostgresTypeId::TIMESTAMPTZ: + case PostgresTypeId::kTimestamptz: return "timestamptz"; - case PostgresTypeId::TIMETZ: + case PostgresTypeId::kTimetz: return "timetz"; - case PostgresTypeId::TSQUERY: + case PostgresTypeId::kTsquery: return "tsquery"; - case PostgresTypeId::TSVECTOR: + case PostgresTypeId::kTsvector: return "tsvector"; - case PostgresTypeId::TXID_SNAPSHOT: + case PostgresTypeId::kTxidSnapshot: return "txid_snapshot"; - case PostgresTypeId::UNKNOWN: + case PostgresTypeId::kUnknown: return "unknown"; - case PostgresTypeId::UUID: + case PostgresTypeId::kUuid: return "uuid"; - case PostgresTypeId::VARBIT: + case PostgresTypeId::kVarbit: return "varbit"; - case PostgresTypeId::VARCHAR: + case PostgresTypeId::kVarchar: return "varchar"; - case PostgresTypeId::VOID: + case PostgresTypeId::kVoid: return "void"; - case PostgresTypeId::XID8: + case PostgresTypeId::kXid8: return "xid8"; - case PostgresTypeId::XID: + case PostgresTypeId::kXid: return "xid"; - case PostgresTypeId::XML: + case PostgresTypeId::kXml: return "xml"; default: return ""; @@ -523,87 +523,87 @@ ADBC_EXPORT_TEST const char* PostgresTypname(PostgresTypeId type_id) { } ADBC_EXPORT_TEST std::vector PostgresTypeIdAll(bool nested) { - std::vector base = {PostgresTypeId::ACLITEM, - PostgresTypeId::ANYARRAY, - PostgresTypeId::ANYCOMPATIBLEARRAY, - PostgresTypeId::BIT, - PostgresTypeId::BOOL, - PostgresTypeId::BOX, - PostgresTypeId::BPCHAR, - PostgresTypeId::BRIN_BLOOM_SUMMARY, - PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY, - PostgresTypeId::BYTEA, - PostgresTypeId::CASH, - PostgresTypeId::CHAR, - PostgresTypeId::CIDR, - PostgresTypeId::CID, - PostgresTypeId::CIRCLE, - PostgresTypeId::CSTRING, - PostgresTypeId::DATE, - PostgresTypeId::FLOAT4, - PostgresTypeId::FLOAT8, - PostgresTypeId::INET, - PostgresTypeId::INT2, - PostgresTypeId::INT2VECTOR, - PostgresTypeId::INT4, - PostgresTypeId::INT8, - PostgresTypeId::INTERVAL, - PostgresTypeId::JSON, - PostgresTypeId::JSONB, - PostgresTypeId::JSONPATH, - PostgresTypeId::LINE, - PostgresTypeId::LSEG, - PostgresTypeId::MACADDR, - PostgresTypeId::MACADDR8, - PostgresTypeId::MULTIRANGE, - PostgresTypeId::NAME, - PostgresTypeId::NUMERIC, - PostgresTypeId::OID, - PostgresTypeId::OIDVECTOR, - PostgresTypeId::PATH, - PostgresTypeId::PG_NODE_TREE, - PostgresTypeId::PG_NDISTINCT, - PostgresTypeId::PG_DEPENDENCIES, - PostgresTypeId::PG_LSN, - PostgresTypeId::PG_MCV_LIST, - PostgresTypeId::PG_DDL_COMMAND, - PostgresTypeId::PG_SNAPSHOT, - PostgresTypeId::POINT, - PostgresTypeId::POLY, - PostgresTypeId::REGCLASS, - PostgresTypeId::REGCOLLATION, - PostgresTypeId::REGCONFIG, - PostgresTypeId::REGDICTIONARY, - PostgresTypeId::REGNAMESPACE, - PostgresTypeId::REGOPERATOR, - PostgresTypeId::REGOPER, - PostgresTypeId::REGPROCEDURE, - PostgresTypeId::REGPROC, - PostgresTypeId::REGROLE, - PostgresTypeId::REGTYPE, - PostgresTypeId::TEXT, - PostgresTypeId::TID, - PostgresTypeId::TIME, - PostgresTypeId::TIMESTAMP, - PostgresTypeId::TIMESTAMPTZ, - PostgresTypeId::TIMETZ, - PostgresTypeId::TSQUERY, - PostgresTypeId::TSVECTOR, - PostgresTypeId::TXID_SNAPSHOT, - PostgresTypeId::UNKNOWN, - PostgresTypeId::UUID, - PostgresTypeId::VARBIT, - PostgresTypeId::VARCHAR, - PostgresTypeId::VOID, - PostgresTypeId::XID8, - PostgresTypeId::XID, - PostgresTypeId::XML}; + std::vector base = {PostgresTypeId::kAclitem, + PostgresTypeId::kAnyarray, + PostgresTypeId::kAnycompatiblearray, + PostgresTypeId::kBit, + PostgresTypeId::kBool, + PostgresTypeId::kBox, + PostgresTypeId::kBpchar, + PostgresTypeId::kBrinBloomSummary, + PostgresTypeId::kBrinMinmaxMultiSummary, + PostgresTypeId::kBytea, + PostgresTypeId::kCash, + PostgresTypeId::kChar, + PostgresTypeId::kCidr, + PostgresTypeId::kCid, + PostgresTypeId::kCircle, + PostgresTypeId::kCstring, + PostgresTypeId::kDate, + PostgresTypeId::kFloat4, + PostgresTypeId::kFloat8, + PostgresTypeId::kInet, + PostgresTypeId::kInt2, + PostgresTypeId::kInt2vector, + PostgresTypeId::kInt4, + PostgresTypeId::kInt8, + PostgresTypeId::kInterval, + PostgresTypeId::kJson, + PostgresTypeId::kJsonb, + PostgresTypeId::kJsonpath, + PostgresTypeId::kLine, + PostgresTypeId::kLseg, + PostgresTypeId::kMacaddr, + PostgresTypeId::kMacaddr8, + PostgresTypeId::kMultirange, + PostgresTypeId::kName, + PostgresTypeId::kNumeric, + PostgresTypeId::kOid, + PostgresTypeId::kOidvector, + PostgresTypeId::kPath, + PostgresTypeId::kPgNodeTree, + PostgresTypeId::kPgNdistinct, + PostgresTypeId::kPgDependencies, + PostgresTypeId::kPgLsn, + PostgresTypeId::kPgMcvList, + PostgresTypeId::kPgDdlCommand, + PostgresTypeId::kPgSnapshot, + PostgresTypeId::kPoint, + PostgresTypeId::kPoly, + PostgresTypeId::kRegclass, + PostgresTypeId::kRegcollation, + PostgresTypeId::kRegconfig, + PostgresTypeId::kRegdictionary, + PostgresTypeId::kRegnamespace, + PostgresTypeId::kRegoperator, + PostgresTypeId::kRegoper, + PostgresTypeId::kRegprocedure, + PostgresTypeId::kRegproc, + PostgresTypeId::kRegrole, + PostgresTypeId::kRegtype, + PostgresTypeId::kText, + PostgresTypeId::kTid, + PostgresTypeId::kTime, + PostgresTypeId::kTimestamp, + PostgresTypeId::kTimestamptz, + PostgresTypeId::kTimetz, + PostgresTypeId::kTsquery, + PostgresTypeId::kTsvector, + PostgresTypeId::kTxidSnapshot, + PostgresTypeId::kUnknown, + PostgresTypeId::kUuid, + PostgresTypeId::kVarbit, + PostgresTypeId::kVarchar, + PostgresTypeId::kVoid, + PostgresTypeId::kXid8, + PostgresTypeId::kXid, + PostgresTypeId::kXml}; if (nested) { - base.push_back(PostgresTypeId::ARRAY); - base.push_back(PostgresTypeId::RECORD); - base.push_back(PostgresTypeId::RANGE); - base.push_back(PostgresTypeId::DOMAIN_); + base.push_back(PostgresTypeId::kArray); + base.push_back(PostgresTypeId::kRecord); + base.push_back(PostgresTypeId::kRange); + base.push_back(PostgresTypeId::kDomain); } return base; diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 46bfd00daa..74fe53cd43 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -47,26 +47,26 @@ class MockTypeResolver : public PostgresTypeResolver { item.oid++; item.typname = "_bool"; item.typreceive = "array_recv"; - item.child_oid = GetOID(PostgresTypeId::BOOL); + item.child_oid = GetOID(PostgresTypeId::kBool); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "boolrange"; item.typreceive = "range_recv"; - item.base_oid = GetOID(PostgresTypeId::BOOL); + item.base_oid = GetOID(PostgresTypeId::kBool); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; item.typname = "custombool"; item.typreceive = "domain_recv"; - item.base_oid = GetOID(PostgresTypeId::BOOL); + item.base_oid = GetOID(PostgresTypeId::kBool); NANOARROW_RETURN_NOT_OK(Insert(item, nullptr)); item.oid++; uint32_t class_oid = item.oid; std::vector> record_fields = { - {"int4_col", GetOID(PostgresTypeId::INT4)}, - {"text_col", GetOID(PostgresTypeId::TEXT)}}; + {"int4_col", GetOID(PostgresTypeId::kInt4)}, + {"text_col", GetOID(PostgresTypeId::kText)}}; InsertClass(class_oid, std::move(record_fields)); item.oid++; @@ -80,10 +80,10 @@ class MockTypeResolver : public PostgresTypeResolver { }; TEST(PostgresTypeTest, PostgresTypeBasic) { - PostgresType type(PostgresTypeId::BOOL); + PostgresType type(PostgresTypeId::kBool); EXPECT_EQ(type.field_name(), ""); EXPECT_EQ(type.typname(), ""); - EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::kBool); EXPECT_EQ(type.oid(), 0); EXPECT_EQ(type.n_children(), 0); @@ -116,9 +116,9 @@ TEST(PostgresTypeTest, PostgresTypeBasic) { EXPECT_EQ(domain.typname(), "domain type name"); EXPECT_EQ(domain.type_id(), type.type_id()); - PostgresType record(PostgresTypeId::RECORD); + PostgresType record(PostgresTypeId::kRecord); record.AppendChild("col1", type); - EXPECT_EQ(record.type_id(), PostgresTypeId::RECORD); + EXPECT_EQ(record.type_id(), PostgresTypeId::kRecord); EXPECT_EQ(record.n_children(), 1); EXPECT_EQ(record.child(0).type_id(), type.type_id()); EXPECT_EQ(record.child(0).field_name(), "col1"); @@ -128,61 +128,61 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) { ArrowSchema schema; ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::BOOL).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kBool).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::INT2).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kInt2).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "s"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::INT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kInt4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "i"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::INT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kInt8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "l"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::FLOAT4).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kFloat4).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "f"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::FLOAT8).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kFloat8).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "g"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::TEXT).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kText).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "u"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::BYTEA).SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kBytea).SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); schema.release(&schema); ArrowSchemaInit(&schema); - EXPECT_EQ(PostgresType(PostgresTypeId::BOOL).Array().SetSchema(&schema), NANOARROW_OK); + EXPECT_EQ(PostgresType(PostgresTypeId::kBool).Array().SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+l"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType record(PostgresTypeId::RECORD); - record.AppendChild("col1", PostgresType(PostgresTypeId::BOOL)); + PostgresType record(PostgresTypeId::kRecord); + record.AppendChild("col1", PostgresType(PostgresTypeId::kBool)); EXPECT_EQ(record.SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "+s"); EXPECT_STREQ(schema.children[0]->format, "b"); schema.release(&schema); ArrowSchemaInit(&schema); - PostgresType unknown(PostgresTypeId::BRIN_MINMAX_MULTI_SUMMARY); + PostgresType unknown(PostgresTypeId::kBrinMinmaxMultiSummary); EXPECT_EQ(unknown.WithPgTypeInfo(0, "some_name").SetSchema(&schema), NANOARROW_OK); EXPECT_STREQ(schema.format, "z"); @@ -202,73 +202,73 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BOOL), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::kBool); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT8), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT8), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT16), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT2); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt2); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT16), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT32), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_UINT32), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT64), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::INT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_FLOAT), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::FLOAT4); + EXPECT_EQ(type.type_id(), PostgresTypeId::kFloat4); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_DOUBLE), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::FLOAT8); + EXPECT_EQ(type.type_id(), PostgresTypeId::kFloat8); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_BINARY), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::BYTEA); + EXPECT_EQ(type.type_id(), PostgresTypeId::kBytea); schema.release(&schema); ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_STRING), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::TEXT); + EXPECT_EQ(type.type_id(), PostgresTypeId::kText); schema.release(&schema); ArrowSchemaInit(&schema); @@ -276,8 +276,8 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_BOOL), NANOARROW_OK); EXPECT_EQ(adbcpq::PostgresType::FromSchema(resolver, &schema, &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.type_id(), PostgresTypeId::ARRAY); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::kArray); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::kBool); schema.release(&schema); ArrowError error; @@ -344,7 +344,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(10, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 10); EXPECT_EQ(type.typname(), "some_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::kBool); // Check insert/resolve of array type item.oid = 11; @@ -355,9 +355,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(11, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 11); EXPECT_EQ(type.typname(), "some_array_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::ARRAY); + EXPECT_EQ(type.type_id(), PostgresTypeId::kArray); EXPECT_EQ(type.child(0).oid(), 10); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::kBool); // Check reverse lookup of array type from item type EXPECT_EQ(resolver.FindArray(10, &type, &error), NANOARROW_OK); @@ -372,9 +372,9 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(12, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 12); EXPECT_EQ(type.typname(), "some_range_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::RANGE); + EXPECT_EQ(type.type_id(), PostgresTypeId::kRange); EXPECT_EQ(type.child(0).oid(), 10); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::kBool); // Check insert/resolve of domain type item.oid = 13; @@ -385,7 +385,7 @@ TEST(PostgresTypeTest, PostgresTypeResolver) { EXPECT_EQ(resolver.Find(13, &type, &error), NANOARROW_OK); EXPECT_EQ(type.oid(), 13); EXPECT_EQ(type.typname(), "some_domain_type_name"); - EXPECT_EQ(type.type_id(), PostgresTypeId::BOOL); + EXPECT_EQ(type.type_id(), PostgresTypeId::kBool); } TEST(PostgresTypeTest, PostgresTypeResolveRecord) { @@ -394,12 +394,12 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { ASSERT_EQ(resolver.Init(), NANOARROW_OK); PostgresType type; - EXPECT_EQ(resolver.Find(resolver.GetOID(PostgresTypeId::RECORD), &type, nullptr), + EXPECT_EQ(resolver.Find(resolver.GetOID(PostgresTypeId::kRecord), &type, nullptr), NANOARROW_OK); - EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::RECORD)); + EXPECT_EQ(type.oid(), resolver.GetOID(PostgresTypeId::kRecord)); EXPECT_EQ(type.n_children(), 2); EXPECT_EQ(type.child(0).field_name(), "int4_col"); - EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::INT4); + EXPECT_EQ(type.child(0).type_id(), PostgresTypeId::kInt4); EXPECT_EQ(type.child(1).field_name(), "text_col"); - EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::TEXT); + EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::kText); } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index f86a7a5616..c88be6829e 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -184,23 +184,23 @@ struct BindStream { PostgresTypeId type_id; switch (bind_schema_fields[i].type) { case ArrowType::NANOARROW_TYPE_INT16: - type_id = PostgresTypeId::INT2; + type_id = PostgresTypeId::kInt2; param_lengths[i] = 2; break; case ArrowType::NANOARROW_TYPE_INT32: - type_id = PostgresTypeId::INT4; + type_id = PostgresTypeId::kInt4; param_lengths[i] = 4; break; case ArrowType::NANOARROW_TYPE_INT64: - type_id = PostgresTypeId::INT8; + type_id = PostgresTypeId::kInt8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_DOUBLE: - type_id = PostgresTypeId::FLOAT8; + type_id = PostgresTypeId::kFloat8; param_lengths[i] = 8; break; case ArrowType::NANOARROW_TYPE_STRING: - type_id = PostgresTypeId::TEXT; + type_id = PostgresTypeId::kText; param_lengths[i] = 0; break; default: From 8b54f3ae25146adfe1302b8d34bef9cf140e2d7f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Apr 2023 09:58:01 -0300 Subject: [PATCH 87/90] back to header-only for testing on Windows --- c/driver/postgresql/CMakeLists.txt | 1 - c/driver/postgresql/postgres_type.cc | 612 --------------------------- c/driver/postgresql/postgres_type.h | 606 +++++++++++++++++++++++++- 3 files changed, 588 insertions(+), 631 deletions(-) delete mode 100644 c/driver/postgresql/postgres_type.cc diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 67aad1776d..3a05da85a3 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -39,7 +39,6 @@ endif() add_arrow_lib(adbc_driver_postgresql SOURCES - postgres_type.cc connection.cc database.cc postgresql.cc diff --git a/c/driver/postgresql/postgres_type.cc b/c/driver/postgresql/postgres_type.cc deleted file mode 100644 index c0d73279c8..0000000000 --- a/c/driver/postgresql/postgres_type.cc +++ /dev/null @@ -1,612 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include - -#include "postgres_type.h" - -namespace adbcpq { - -ADBC_EXPORT_TEST ArrowErrorCode PostgresType::SetSchema(ArrowSchema* schema) const { - switch (type_id_) { - case PostgresTypeId::kBool: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); - break; - case PostgresTypeId::kInt2: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); - break; - case PostgresTypeId::kInt4: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); - break; - case PostgresTypeId::kInt8: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); - break; - case PostgresTypeId::kFloat4: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); - break; - case PostgresTypeId::kFloat8: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); - break; - case PostgresTypeId::kChar: - case PostgresTypeId::kBpchar: - case PostgresTypeId::kVarchar: - case PostgresTypeId::kText: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); - break; - case PostgresTypeId::kBytea: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); - break; - - case PostgresTypeId::kRecord: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); - for (int64_t i = 0; i < n_children(); i++) { - NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); - } - break; - - case PostgresTypeId::kArray: - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); - NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); - break; - default: { - // For any types we don't explicitly know how to deal with, we can still - // return the bytes postgres gives us and attach the type name as metadata - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); - nanoarrow::UniqueBuffer buffer; - ArrowMetadataBuilderInit(buffer.get(), nullptr); - NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( - buffer.get(), ArrowCharView("ADBC:postgresql:typname"), - ArrowCharView(typname_.c_str()))); - NANOARROW_RETURN_NOT_OK( - ArrowSchemaSetMetadata(schema, reinterpret_cast(buffer->data))); - break; - } - } - - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, field_name_.c_str())); - return NANOARROW_OK; -} - -ADBC_EXPORT_TEST ArrowErrorCode -PostgresType::FromSchema(const PostgresTypeResolver& resolver, ArrowSchema* schema, - PostgresType* out, ArrowError* error) { - ArrowSchemaView schema_view; - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); - - switch (schema_view.type) { - case NANOARROW_TYPE_BOOL: - return resolver.Find(resolver.GetOID(PostgresTypeId::kBool), out, error); - case NANOARROW_TYPE_INT8: - case NANOARROW_TYPE_UINT8: - case NANOARROW_TYPE_INT16: - return resolver.Find(resolver.GetOID(PostgresTypeId::kInt2), out, error); - case NANOARROW_TYPE_UINT16: - case NANOARROW_TYPE_INT32: - return resolver.Find(resolver.GetOID(PostgresTypeId::kInt4), out, error); - case NANOARROW_TYPE_UINT32: - case NANOARROW_TYPE_INT64: - return resolver.Find(resolver.GetOID(PostgresTypeId::kInt8), out, error); - case NANOARROW_TYPE_FLOAT: - return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat4), out, error); - case NANOARROW_TYPE_DOUBLE: - return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat8), out, error); - case NANOARROW_TYPE_STRING: - return resolver.Find(resolver.GetOID(PostgresTypeId::kText), out, error); - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return resolver.Find(resolver.GetOID(PostgresTypeId::kBytea), out, error); - case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_LARGE_LIST: - case NANOARROW_TYPE_FIXED_SIZE_LIST: { - PostgresType child; - NANOARROW_RETURN_NOT_OK( - PostgresType::FromSchema(resolver, schema->children[0], &child, error)); - return resolver.FindArray(child.oid(), out, error); - } - - default: - ArrowErrorSet(error, "Can't map Arrow type '%s' to Postgres type", - ArrowTypeString(schema_view.type)); - return ENOTSUP; - } -} - -ADBC_EXPORT_TEST ArrowErrorCode PostgresTypeResolver::Insert(const Item& item, - ArrowError* error) { - auto result = base_.find(item.typreceive); - if (result == base_.end()) { - ArrowErrorSet(error, "Base type not found for type '%s' with receive function '%s'", - item.typname, item.typreceive); - return ENOTSUP; - } - - const PostgresType& base = result->second; - PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); - - switch (base.type_id()) { - case PostgresTypeId::kArray: { - PostgresType child; - NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); - mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - array_mapping_.insert({child.oid(), item.oid}); - break; - } - - case PostgresTypeId::kRecord: { - std::vector> child_desc; - NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); - - PostgresType out(PostgresTypeId::kRecord); - for (const auto& child_item : child_desc) { - PostgresType child; - NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); - out.AppendChild(child_item.first, child); - } - - mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - case PostgresTypeId::kDomain: { - PostgresType base_type; - NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); - mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - case PostgresTypeId::kRange: { - PostgresType base_type; - NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); - mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - default: - mapping_.insert({item.oid, type}); - reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); - break; - } - - return NANOARROW_OK; -} - -ADBC_EXPORT_TEST const char* PostgresTyprecv(PostgresTypeId type_id) { - switch (type_id) { - case PostgresTypeId::kAclitem: - return "aclitem_recv"; - case PostgresTypeId::kAnyarray: - return "anyarray_recv"; - case PostgresTypeId::kAnycompatiblearray: - return "anycompatiblearray_recv"; - case PostgresTypeId::kArray: - return "array_recv"; - case PostgresTypeId::kBit: - return "bit_recv"; - case PostgresTypeId::kBool: - return "boolrecv"; - case PostgresTypeId::kBox: - return "box_recv"; - case PostgresTypeId::kBpchar: - return "bpcharrecv"; - case PostgresTypeId::kBrinBloomSummary: - return "brin_bloom_summary_recv"; - case PostgresTypeId::kBrinMinmaxMultiSummary: - return "brin_minmax_multi_summary_recv"; - case PostgresTypeId::kBytea: - return "bytearecv"; - case PostgresTypeId::kCash: - return "cash_recv"; - case PostgresTypeId::kChar: - return "charrecv"; - case PostgresTypeId::kCidr: - return "cidr_recv"; - case PostgresTypeId::kCid: - return "cidrecv"; - case PostgresTypeId::kCircle: - return "circle_recv"; - case PostgresTypeId::kCstring: - return "cstring_recv"; - case PostgresTypeId::kDate: - return "date_recv"; - case PostgresTypeId::kDomain: - return "domain_recv"; - case PostgresTypeId::kFloat4: - return "float4recv"; - case PostgresTypeId::kFloat8: - return "float8recv"; - case PostgresTypeId::kInet: - return "inet_recv"; - case PostgresTypeId::kInt2: - return "int2recv"; - case PostgresTypeId::kInt2vector: - return "int2vectorrecv"; - case PostgresTypeId::kInt4: - return "int4recv"; - case PostgresTypeId::kInt8: - return "int8recv"; - case PostgresTypeId::kInterval: - return "interval_recv"; - case PostgresTypeId::kJson: - return "json_recv"; - case PostgresTypeId::kJsonb: - return "jsonb_recv"; - case PostgresTypeId::kJsonpath: - return "jsonpath_recv"; - case PostgresTypeId::kLine: - return "line_recv"; - case PostgresTypeId::kLseg: - return "lseg_recv"; - case PostgresTypeId::kMacaddr: - return "macaddr_recv"; - case PostgresTypeId::kMacaddr8: - return "macaddr8_recv"; - case PostgresTypeId::kMultirange: - return "multirange_recv"; - case PostgresTypeId::kName: - return "namerecv"; - case PostgresTypeId::kNumeric: - return "numeric_recv"; - case PostgresTypeId::kOid: - return "oidrecv"; - case PostgresTypeId::kOidvector: - return "oidvectorrecv"; - case PostgresTypeId::kPath: - return "path_recv"; - case PostgresTypeId::kPgNodeTree: - return "pg_node_tree_recv"; - case PostgresTypeId::kPgNdistinct: - return "pg_ndistinct_recv"; - case PostgresTypeId::kPgDependencies: - return "pg_dependencies_recv"; - case PostgresTypeId::kPgLsn: - return "pg_lsn_recv"; - case PostgresTypeId::kPgMcvList: - return "pg_mcv_list_recv"; - case PostgresTypeId::kPgDdlCommand: - return "pg_ddl_command_recv"; - case PostgresTypeId::kPgSnapshot: - return "pg_snapshot_recv"; - case PostgresTypeId::kPoint: - return "point_recv"; - case PostgresTypeId::kPoly: - return "poly_recv"; - case PostgresTypeId::kRange: - return "range_recv"; - case PostgresTypeId::kRecord: - return "record_recv"; - case PostgresTypeId::kRegclass: - return "regclassrecv"; - case PostgresTypeId::kRegcollation: - return "regcollationrecv"; - case PostgresTypeId::kRegconfig: - return "regconfigrecv"; - case PostgresTypeId::kRegdictionary: - return "regdictionaryrecv"; - case PostgresTypeId::kRegnamespace: - return "regnamespacerecv"; - case PostgresTypeId::kRegoperator: - return "regoperatorrecv"; - case PostgresTypeId::kRegoper: - return "regoperrecv"; - case PostgresTypeId::kRegprocedure: - return "regprocedurerecv"; - case PostgresTypeId::kRegproc: - return "regprocrecv"; - case PostgresTypeId::kRegrole: - return "regrolerecv"; - case PostgresTypeId::kRegtype: - return "regtyperecv"; - case PostgresTypeId::kText: - return "textrecv"; - case PostgresTypeId::kTid: - return "tidrecv"; - case PostgresTypeId::kTime: - return "time_recv"; - case PostgresTypeId::kTimestamp: - return "timestamp_recv"; - case PostgresTypeId::kTimestamptz: - return "timestamptz_recv"; - case PostgresTypeId::kTimetz: - return "timetz_recv"; - case PostgresTypeId::kTsquery: - return "tsqueryrecv"; - case PostgresTypeId::kTsvector: - return "tsvectorrecv"; - case PostgresTypeId::kTxidSnapshot: - return "txid_snapshot_recv"; - case PostgresTypeId::kUnknown: - return "unknownrecv"; - case PostgresTypeId::kUuid: - return "uuid_recv"; - case PostgresTypeId::kVarbit: - return "varbit_recv"; - case PostgresTypeId::kVarchar: - return "varcharrecv"; - case PostgresTypeId::kVoid: - return "void_recv"; - case PostgresTypeId::kXid8: - return "xid8recv"; - case PostgresTypeId::kXid: - return "xidrecv"; - case PostgresTypeId::kXml: - return "xml_recv"; - default: - return ""; - } -} - -ADBC_EXPORT_TEST const char* PostgresTypname(PostgresTypeId type_id) { - switch (type_id) { - case PostgresTypeId::kAclitem: - return "aclitem"; - case PostgresTypeId::kAnyarray: - return "anyarray"; - case PostgresTypeId::kAnycompatiblearray: - return "anycompatiblearray"; - case PostgresTypeId::kArray: - return "array"; - case PostgresTypeId::kBit: - return "bit"; - case PostgresTypeId::kBool: - return "bool"; - case PostgresTypeId::kBox: - return "box"; - case PostgresTypeId::kBpchar: - return "bpchar"; - case PostgresTypeId::kBrinBloomSummary: - return "brin_bloom_summary"; - case PostgresTypeId::kBrinMinmaxMultiSummary: - return "brin_minmax_multi_summary"; - case PostgresTypeId::kBytea: - return "bytea"; - case PostgresTypeId::kCash: - return "cash"; - case PostgresTypeId::kChar: - return "char"; - case PostgresTypeId::kCidr: - return "cidr"; - case PostgresTypeId::kCid: - return "cid"; - case PostgresTypeId::kCircle: - return "circle"; - case PostgresTypeId::kCstring: - return "cstring"; - case PostgresTypeId::kDate: - return "date"; - case PostgresTypeId::kDomain: - return "domain"; - case PostgresTypeId::kFloat4: - return "float4"; - case PostgresTypeId::kFloat8: - return "float8"; - case PostgresTypeId::kInet: - return "inet"; - case PostgresTypeId::kInt2: - return "int2"; - case PostgresTypeId::kInt2vector: - return "int2vector"; - case PostgresTypeId::kInt4: - return "int4"; - case PostgresTypeId::kInt8: - return "int8"; - case PostgresTypeId::kInterval: - return "interval"; - case PostgresTypeId::kJson: - return "json"; - case PostgresTypeId::kJsonb: - return "jsonb"; - case PostgresTypeId::kJsonpath: - return "jsonpath"; - case PostgresTypeId::kLine: - return "line"; - case PostgresTypeId::kLseg: - return "lseg"; - case PostgresTypeId::kMacaddr: - return "macaddr"; - case PostgresTypeId::kMacaddr8: - return "macaddr8"; - case PostgresTypeId::kMultirange: - return "multirange"; - case PostgresTypeId::kName: - return "name"; - case PostgresTypeId::kNumeric: - return "numeric"; - case PostgresTypeId::kOid: - return "oid"; - case PostgresTypeId::kOidvector: - return "oidvector"; - case PostgresTypeId::kPath: - return "path"; - case PostgresTypeId::kPgNodeTree: - return "pg_node_tree"; - case PostgresTypeId::kPgNdistinct: - return "pg_ndistinct"; - case PostgresTypeId::kPgDependencies: - return "pg_dependencies"; - case PostgresTypeId::kPgLsn: - return "pg_lsn"; - case PostgresTypeId::kPgMcvList: - return "pg_mcv_list"; - case PostgresTypeId::kPgDdlCommand: - return "pg_ddl_command"; - case PostgresTypeId::kPgSnapshot: - return "pg_snapshot"; - case PostgresTypeId::kPoint: - return "point"; - case PostgresTypeId::kPoly: - return "poly"; - case PostgresTypeId::kRange: - return "range"; - case PostgresTypeId::kRecord: - return "record"; - case PostgresTypeId::kRegclass: - return "regclass"; - case PostgresTypeId::kRegcollation: - return "regcollation"; - case PostgresTypeId::kRegconfig: - return "regconfig"; - case PostgresTypeId::kRegdictionary: - return "regdictionary"; - case PostgresTypeId::kRegnamespace: - return "regnamespace"; - case PostgresTypeId::kRegoperator: - return "regoperator"; - case PostgresTypeId::kRegoper: - return "regoper"; - case PostgresTypeId::kRegprocedure: - return "regprocedure"; - case PostgresTypeId::kRegproc: - return "regproc"; - case PostgresTypeId::kRegrole: - return "regrole"; - case PostgresTypeId::kRegtype: - return "regtype"; - case PostgresTypeId::kText: - return "text"; - case PostgresTypeId::kTid: - return "tid"; - case PostgresTypeId::kTime: - return "time"; - case PostgresTypeId::kTimestamp: - return "timestamp"; - case PostgresTypeId::kTimestamptz: - return "timestamptz"; - case PostgresTypeId::kTimetz: - return "timetz"; - case PostgresTypeId::kTsquery: - return "tsquery"; - case PostgresTypeId::kTsvector: - return "tsvector"; - case PostgresTypeId::kTxidSnapshot: - return "txid_snapshot"; - case PostgresTypeId::kUnknown: - return "unknown"; - case PostgresTypeId::kUuid: - return "uuid"; - case PostgresTypeId::kVarbit: - return "varbit"; - case PostgresTypeId::kVarchar: - return "varchar"; - case PostgresTypeId::kVoid: - return "void"; - case PostgresTypeId::kXid8: - return "xid8"; - case PostgresTypeId::kXid: - return "xid"; - case PostgresTypeId::kXml: - return "xml"; - default: - return ""; - } -} - -ADBC_EXPORT_TEST std::vector PostgresTypeIdAll(bool nested) { - std::vector base = {PostgresTypeId::kAclitem, - PostgresTypeId::kAnyarray, - PostgresTypeId::kAnycompatiblearray, - PostgresTypeId::kBit, - PostgresTypeId::kBool, - PostgresTypeId::kBox, - PostgresTypeId::kBpchar, - PostgresTypeId::kBrinBloomSummary, - PostgresTypeId::kBrinMinmaxMultiSummary, - PostgresTypeId::kBytea, - PostgresTypeId::kCash, - PostgresTypeId::kChar, - PostgresTypeId::kCidr, - PostgresTypeId::kCid, - PostgresTypeId::kCircle, - PostgresTypeId::kCstring, - PostgresTypeId::kDate, - PostgresTypeId::kFloat4, - PostgresTypeId::kFloat8, - PostgresTypeId::kInet, - PostgresTypeId::kInt2, - PostgresTypeId::kInt2vector, - PostgresTypeId::kInt4, - PostgresTypeId::kInt8, - PostgresTypeId::kInterval, - PostgresTypeId::kJson, - PostgresTypeId::kJsonb, - PostgresTypeId::kJsonpath, - PostgresTypeId::kLine, - PostgresTypeId::kLseg, - PostgresTypeId::kMacaddr, - PostgresTypeId::kMacaddr8, - PostgresTypeId::kMultirange, - PostgresTypeId::kName, - PostgresTypeId::kNumeric, - PostgresTypeId::kOid, - PostgresTypeId::kOidvector, - PostgresTypeId::kPath, - PostgresTypeId::kPgNodeTree, - PostgresTypeId::kPgNdistinct, - PostgresTypeId::kPgDependencies, - PostgresTypeId::kPgLsn, - PostgresTypeId::kPgMcvList, - PostgresTypeId::kPgDdlCommand, - PostgresTypeId::kPgSnapshot, - PostgresTypeId::kPoint, - PostgresTypeId::kPoly, - PostgresTypeId::kRegclass, - PostgresTypeId::kRegcollation, - PostgresTypeId::kRegconfig, - PostgresTypeId::kRegdictionary, - PostgresTypeId::kRegnamespace, - PostgresTypeId::kRegoperator, - PostgresTypeId::kRegoper, - PostgresTypeId::kRegprocedure, - PostgresTypeId::kRegproc, - PostgresTypeId::kRegrole, - PostgresTypeId::kRegtype, - PostgresTypeId::kText, - PostgresTypeId::kTid, - PostgresTypeId::kTime, - PostgresTypeId::kTimestamp, - PostgresTypeId::kTimestamptz, - PostgresTypeId::kTimetz, - PostgresTypeId::kTsquery, - PostgresTypeId::kTsvector, - PostgresTypeId::kTxidSnapshot, - PostgresTypeId::kUnknown, - PostgresTypeId::kUuid, - PostgresTypeId::kVarbit, - PostgresTypeId::kVarchar, - PostgresTypeId::kVoid, - PostgresTypeId::kXid8, - PostgresTypeId::kXid, - PostgresTypeId::kXml}; - - if (nested) { - base.push_back(PostgresTypeId::kArray); - base.push_back(PostgresTypeId::kRecord); - base.push_back(PostgresTypeId::kRange); - base.push_back(PostgresTypeId::kDomain); - } - - return base; -} - -} // namespace adbcpq diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index ceb32afec0..b327eb518f 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -24,15 +24,7 @@ #include #include -#include - -#include - -#if defined(ADBC_BUILDING_TESTS) -#define ADBC_EXPORT_TEST ADBC_EXPORT -#else -#define ADBC_EXPORT_TEST -#endif +#include namespace adbcpq { @@ -123,18 +115,17 @@ enum class PostgresTypeId { // Returns the receive function name as defined in the typrecieve column // of the pg_type table. This name is the one that gets used to look up // the PostgresTypeId. -ADBC_EXPORT_TEST const char* PostgresTyprecv(PostgresTypeId type_id); +static inline const char* PostgresTyprecv(PostgresTypeId type_id); // Returns a likely typname value for a given PostgresTypeId. This is useful // for testing and error messages but may not be the actual value present // in the pg_type typname column. -ADBC_EXPORT_TEST const char* PostgresTypname(PostgresTypeId type_id); +static inline const char* PostgresTypname(PostgresTypeId type_id); // A vector of all type IDs, optionally including the nested types PostgresTypeId::ARRAY, // PostgresTypeId::DOMAIN_, PostgresTypeId::RECORD, and PostgresTypeId::RANGE. -ADBC_EXPORT_TEST std::vector PostgresTypeIdAll(bool nested = true); +static inline std::vector PostgresTypeIdAll(bool nested = true); -// Forward-declare the type resolver for use in PostgresType::FromSchema class PostgresTypeResolver; // An abstraction of a (potentially nested and/or parameterized) Postgres @@ -198,11 +189,69 @@ class PostgresType { // do not have a corresponding Arrow type are returned as Binary with field // metadata ADBC:posgresql:typname. These types can be represented as their // binary COPY representation in the output. - ADBC_EXPORT_TEST ArrowErrorCode SetSchema(ArrowSchema* schema) const; + ArrowErrorCode SetSchema(ArrowSchema* schema) const { + switch (type_id_) { + case PostgresTypeId::kBool: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); + break; + case PostgresTypeId::kInt2: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT16)); + break; + case PostgresTypeId::kInt4: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); + break; + case PostgresTypeId::kInt8: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); + break; + case PostgresTypeId::kFloat4: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); + break; + case PostgresTypeId::kFloat8: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); + break; + case PostgresTypeId::kChar: + case PostgresTypeId::kBpchar: + case PostgresTypeId::kVarchar: + case PostgresTypeId::kText: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); + break; + case PostgresTypeId::kBytea: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + break; + + case PostgresTypeId::kRecord: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children())); + for (int64_t i = 0; i < n_children(); i++) { + NANOARROW_RETURN_NOT_OK(children_[i].SetSchema(schema->children[i])); + } + break; + + case PostgresTypeId::kArray: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); + NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0])); + break; + default: { + // For any types we don't explicitly know how to deal with, we can still + // return the bytes postgres gives us and attach the type name as metadata + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); + nanoarrow::UniqueBuffer buffer; + ArrowMetadataBuilderInit(buffer.get(), nullptr); + NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( + buffer.get(), ArrowCharView("ADBC:postgresql:typname"), + ArrowCharView(typname_.c_str()))); + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetMetadata(schema, reinterpret_cast(buffer->data))); + break; + } + } + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, field_name_.c_str())); + return NANOARROW_OK; + } - ADBC_EXPORT_TEST static ArrowErrorCode FromSchema(const PostgresTypeResolver& resolver, - ArrowSchema* schema, - PostgresType* out, ArrowError* error); + static ArrowErrorCode FromSchema(const PostgresTypeResolver& resolver, + ArrowSchema* schema, PostgresType* out, + ArrowError* error); private: uint32_t oid_; @@ -275,7 +324,67 @@ class PostgresTypeResolver { // of Inserts matters: Non-array types must be inserted before the corresponding // array types and class definitions must be inserted before the corresponding // class type using InsertClass(). - ADBC_EXPORT_TEST ArrowErrorCode Insert(const Item& item, ArrowError* error); + ArrowErrorCode Insert(const Item& item, ArrowError* error) { + auto result = base_.find(item.typreceive); + if (result == base_.end()) { + ArrowErrorSet(error, "Base type not found for type '%s' with receive function '%s'", + item.typname, item.typreceive); + return ENOTSUP; + } + + const PostgresType& base = result->second; + PostgresType type = base.WithPgTypeInfo(item.oid, item.typname); + + switch (base.type_id()) { + case PostgresTypeId::kArray: { + PostgresType child; + NANOARROW_RETURN_NOT_OK(Find(item.child_oid, &child, error)); + mapping_.insert({item.oid, child.Array(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + array_mapping_.insert({child.oid(), item.oid}); + break; + } + + case PostgresTypeId::kRecord: { + std::vector> child_desc; + NANOARROW_RETURN_NOT_OK(ResolveClass(item.class_oid, &child_desc, error)); + + PostgresType out(PostgresTypeId::kRecord); + for (const auto& child_item : child_desc) { + PostgresType child; + NANOARROW_RETURN_NOT_OK(Find(child_item.second, &child, error)); + out.AppendChild(child_item.first, child); + } + + mapping_.insert({item.oid, out.WithPgTypeInfo(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + case PostgresTypeId::kDomain: { + PostgresType base_type; + NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); + mapping_.insert({item.oid, base_type.Domain(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + case PostgresTypeId::kRange: { + PostgresType base_type; + NANOARROW_RETURN_NOT_OK(Find(item.base_oid, &base_type, error)); + mapping_.insert({item.oid, base_type.Range(item.oid, item.typname)}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + default: + mapping_.insert({item.oid, type}); + reverse_mapping_.insert({static_cast(base.type_id()), item.oid}); + break; + } + + return NANOARROW_OK; + } // Insert a class definition. For the purposes of resolving a PostgresType // instance, this is simply a vector of field_name: oid tuples. The specified @@ -324,4 +433,465 @@ class PostgresTypeResolver { } }; +inline ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resolver, + ArrowSchema* schema, PostgresType* out, + ArrowError* error) { + ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + return resolver.Find(resolver.GetOID(PostgresTypeId::kBool), out, error); + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT16: + return resolver.Find(resolver.GetOID(PostgresTypeId::kInt2), out, error); + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT32: + return resolver.Find(resolver.GetOID(PostgresTypeId::kInt4), out, error); + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT64: + return resolver.Find(resolver.GetOID(PostgresTypeId::kInt8), out, error); + case NANOARROW_TYPE_FLOAT: + return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat4), out, error); + case NANOARROW_TYPE_DOUBLE: + return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat8), out, error); + case NANOARROW_TYPE_STRING: + return resolver.Find(resolver.GetOID(PostgresTypeId::kText), out, error); + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + return resolver.Find(resolver.GetOID(PostgresTypeId::kBytea), out, error); + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_FIXED_SIZE_LIST: { + PostgresType child; + NANOARROW_RETURN_NOT_OK( + PostgresType::FromSchema(resolver, schema->children[0], &child, error)); + return resolver.FindArray(child.oid(), out, error); + } + + default: + ArrowErrorSet(error, "Can't map Arrow type '%s' to Postgres type", + ArrowTypeString(schema_view.type)); + return ENOTSUP; + } +} + +static inline const char* PostgresTyprecv(PostgresTypeId type_id) { + switch (type_id) { + case PostgresTypeId::kAclitem: + return "aclitem_recv"; + case PostgresTypeId::kAnyarray: + return "anyarray_recv"; + case PostgresTypeId::kAnycompatiblearray: + return "anycompatiblearray_recv"; + case PostgresTypeId::kArray: + return "array_recv"; + case PostgresTypeId::kBit: + return "bit_recv"; + case PostgresTypeId::kBool: + return "boolrecv"; + case PostgresTypeId::kBox: + return "box_recv"; + case PostgresTypeId::kBpchar: + return "bpcharrecv"; + case PostgresTypeId::kBrinBloomSummary: + return "brin_bloom_summary_recv"; + case PostgresTypeId::kBrinMinmaxMultiSummary: + return "brin_minmax_multi_summary_recv"; + case PostgresTypeId::kBytea: + return "bytearecv"; + case PostgresTypeId::kCash: + return "cash_recv"; + case PostgresTypeId::kChar: + return "charrecv"; + case PostgresTypeId::kCidr: + return "cidr_recv"; + case PostgresTypeId::kCid: + return "cidrecv"; + case PostgresTypeId::kCircle: + return "circle_recv"; + case PostgresTypeId::kCstring: + return "cstring_recv"; + case PostgresTypeId::kDate: + return "date_recv"; + case PostgresTypeId::kDomain: + return "domain_recv"; + case PostgresTypeId::kFloat4: + return "float4recv"; + case PostgresTypeId::kFloat8: + return "float8recv"; + case PostgresTypeId::kInet: + return "inet_recv"; + case PostgresTypeId::kInt2: + return "int2recv"; + case PostgresTypeId::kInt2vector: + return "int2vectorrecv"; + case PostgresTypeId::kInt4: + return "int4recv"; + case PostgresTypeId::kInt8: + return "int8recv"; + case PostgresTypeId::kInterval: + return "interval_recv"; + case PostgresTypeId::kJson: + return "json_recv"; + case PostgresTypeId::kJsonb: + return "jsonb_recv"; + case PostgresTypeId::kJsonpath: + return "jsonpath_recv"; + case PostgresTypeId::kLine: + return "line_recv"; + case PostgresTypeId::kLseg: + return "lseg_recv"; + case PostgresTypeId::kMacaddr: + return "macaddr_recv"; + case PostgresTypeId::kMacaddr8: + return "macaddr8_recv"; + case PostgresTypeId::kMultirange: + return "multirange_recv"; + case PostgresTypeId::kName: + return "namerecv"; + case PostgresTypeId::kNumeric: + return "numeric_recv"; + case PostgresTypeId::kOid: + return "oidrecv"; + case PostgresTypeId::kOidvector: + return "oidvectorrecv"; + case PostgresTypeId::kPath: + return "path_recv"; + case PostgresTypeId::kPgNodeTree: + return "pg_node_tree_recv"; + case PostgresTypeId::kPgNdistinct: + return "pg_ndistinct_recv"; + case PostgresTypeId::kPgDependencies: + return "pg_dependencies_recv"; + case PostgresTypeId::kPgLsn: + return "pg_lsn_recv"; + case PostgresTypeId::kPgMcvList: + return "pg_mcv_list_recv"; + case PostgresTypeId::kPgDdlCommand: + return "pg_ddl_command_recv"; + case PostgresTypeId::kPgSnapshot: + return "pg_snapshot_recv"; + case PostgresTypeId::kPoint: + return "point_recv"; + case PostgresTypeId::kPoly: + return "poly_recv"; + case PostgresTypeId::kRange: + return "range_recv"; + case PostgresTypeId::kRecord: + return "record_recv"; + case PostgresTypeId::kRegclass: + return "regclassrecv"; + case PostgresTypeId::kRegcollation: + return "regcollationrecv"; + case PostgresTypeId::kRegconfig: + return "regconfigrecv"; + case PostgresTypeId::kRegdictionary: + return "regdictionaryrecv"; + case PostgresTypeId::kRegnamespace: + return "regnamespacerecv"; + case PostgresTypeId::kRegoperator: + return "regoperatorrecv"; + case PostgresTypeId::kRegoper: + return "regoperrecv"; + case PostgresTypeId::kRegprocedure: + return "regprocedurerecv"; + case PostgresTypeId::kRegproc: + return "regprocrecv"; + case PostgresTypeId::kRegrole: + return "regrolerecv"; + case PostgresTypeId::kRegtype: + return "regtyperecv"; + case PostgresTypeId::kText: + return "textrecv"; + case PostgresTypeId::kTid: + return "tidrecv"; + case PostgresTypeId::kTime: + return "time_recv"; + case PostgresTypeId::kTimestamp: + return "timestamp_recv"; + case PostgresTypeId::kTimestamptz: + return "timestamptz_recv"; + case PostgresTypeId::kTimetz: + return "timetz_recv"; + case PostgresTypeId::kTsquery: + return "tsqueryrecv"; + case PostgresTypeId::kTsvector: + return "tsvectorrecv"; + case PostgresTypeId::kTxidSnapshot: + return "txid_snapshot_recv"; + case PostgresTypeId::kUnknown: + return "unknownrecv"; + case PostgresTypeId::kUuid: + return "uuid_recv"; + case PostgresTypeId::kVarbit: + return "varbit_recv"; + case PostgresTypeId::kVarchar: + return "varcharrecv"; + case PostgresTypeId::kVoid: + return "void_recv"; + case PostgresTypeId::kXid8: + return "xid8recv"; + case PostgresTypeId::kXid: + return "xidrecv"; + case PostgresTypeId::kXml: + return "xml_recv"; + default: + return ""; + } +} + +static inline const char* PostgresTypname(PostgresTypeId type_id) { + switch (type_id) { + case PostgresTypeId::kAclitem: + return "aclitem"; + case PostgresTypeId::kAnyarray: + return "anyarray"; + case PostgresTypeId::kAnycompatiblearray: + return "anycompatiblearray"; + case PostgresTypeId::kArray: + return "array"; + case PostgresTypeId::kBit: + return "bit"; + case PostgresTypeId::kBool: + return "bool"; + case PostgresTypeId::kBox: + return "box"; + case PostgresTypeId::kBpchar: + return "bpchar"; + case PostgresTypeId::kBrinBloomSummary: + return "brin_bloom_summary"; + case PostgresTypeId::kBrinMinmaxMultiSummary: + return "brin_minmax_multi_summary"; + case PostgresTypeId::kBytea: + return "bytea"; + case PostgresTypeId::kCash: + return "cash"; + case PostgresTypeId::kChar: + return "char"; + case PostgresTypeId::kCidr: + return "cidr"; + case PostgresTypeId::kCid: + return "cid"; + case PostgresTypeId::kCircle: + return "circle"; + case PostgresTypeId::kCstring: + return "cstring"; + case PostgresTypeId::kDate: + return "date"; + case PostgresTypeId::kDomain: + return "domain"; + case PostgresTypeId::kFloat4: + return "float4"; + case PostgresTypeId::kFloat8: + return "float8"; + case PostgresTypeId::kInet: + return "inet"; + case PostgresTypeId::kInt2: + return "int2"; + case PostgresTypeId::kInt2vector: + return "int2vector"; + case PostgresTypeId::kInt4: + return "int4"; + case PostgresTypeId::kInt8: + return "int8"; + case PostgresTypeId::kInterval: + return "interval"; + case PostgresTypeId::kJson: + return "json"; + case PostgresTypeId::kJsonb: + return "jsonb"; + case PostgresTypeId::kJsonpath: + return "jsonpath"; + case PostgresTypeId::kLine: + return "line"; + case PostgresTypeId::kLseg: + return "lseg"; + case PostgresTypeId::kMacaddr: + return "macaddr"; + case PostgresTypeId::kMacaddr8: + return "macaddr8"; + case PostgresTypeId::kMultirange: + return "multirange"; + case PostgresTypeId::kName: + return "name"; + case PostgresTypeId::kNumeric: + return "numeric"; + case PostgresTypeId::kOid: + return "oid"; + case PostgresTypeId::kOidvector: + return "oidvector"; + case PostgresTypeId::kPath: + return "path"; + case PostgresTypeId::kPgNodeTree: + return "pg_node_tree"; + case PostgresTypeId::kPgNdistinct: + return "pg_ndistinct"; + case PostgresTypeId::kPgDependencies: + return "pg_dependencies"; + case PostgresTypeId::kPgLsn: + return "pg_lsn"; + case PostgresTypeId::kPgMcvList: + return "pg_mcv_list"; + case PostgresTypeId::kPgDdlCommand: + return "pg_ddl_command"; + case PostgresTypeId::kPgSnapshot: + return "pg_snapshot"; + case PostgresTypeId::kPoint: + return "point"; + case PostgresTypeId::kPoly: + return "poly"; + case PostgresTypeId::kRange: + return "range"; + case PostgresTypeId::kRecord: + return "record"; + case PostgresTypeId::kRegclass: + return "regclass"; + case PostgresTypeId::kRegcollation: + return "regcollation"; + case PostgresTypeId::kRegconfig: + return "regconfig"; + case PostgresTypeId::kRegdictionary: + return "regdictionary"; + case PostgresTypeId::kRegnamespace: + return "regnamespace"; + case PostgresTypeId::kRegoperator: + return "regoperator"; + case PostgresTypeId::kRegoper: + return "regoper"; + case PostgresTypeId::kRegprocedure: + return "regprocedure"; + case PostgresTypeId::kRegproc: + return "regproc"; + case PostgresTypeId::kRegrole: + return "regrole"; + case PostgresTypeId::kRegtype: + return "regtype"; + case PostgresTypeId::kText: + return "text"; + case PostgresTypeId::kTid: + return "tid"; + case PostgresTypeId::kTime: + return "time"; + case PostgresTypeId::kTimestamp: + return "timestamp"; + case PostgresTypeId::kTimestamptz: + return "timestamptz"; + case PostgresTypeId::kTimetz: + return "timetz"; + case PostgresTypeId::kTsquery: + return "tsquery"; + case PostgresTypeId::kTsvector: + return "tsvector"; + case PostgresTypeId::kTxidSnapshot: + return "txid_snapshot"; + case PostgresTypeId::kUnknown: + return "unknown"; + case PostgresTypeId::kUuid: + return "uuid"; + case PostgresTypeId::kVarbit: + return "varbit"; + case PostgresTypeId::kVarchar: + return "varchar"; + case PostgresTypeId::kVoid: + return "void"; + case PostgresTypeId::kXid8: + return "xid8"; + case PostgresTypeId::kXid: + return "xid"; + case PostgresTypeId::kXml: + return "xml"; + default: + return ""; + } +} + +static inline std::vector PostgresTypeIdAll(bool nested) { + std::vector base = {PostgresTypeId::kAclitem, + PostgresTypeId::kAnyarray, + PostgresTypeId::kAnycompatiblearray, + PostgresTypeId::kBit, + PostgresTypeId::kBool, + PostgresTypeId::kBox, + PostgresTypeId::kBpchar, + PostgresTypeId::kBrinBloomSummary, + PostgresTypeId::kBrinMinmaxMultiSummary, + PostgresTypeId::kBytea, + PostgresTypeId::kCash, + PostgresTypeId::kChar, + PostgresTypeId::kCidr, + PostgresTypeId::kCid, + PostgresTypeId::kCircle, + PostgresTypeId::kCstring, + PostgresTypeId::kDate, + PostgresTypeId::kFloat4, + PostgresTypeId::kFloat8, + PostgresTypeId::kInet, + PostgresTypeId::kInt2, + PostgresTypeId::kInt2vector, + PostgresTypeId::kInt4, + PostgresTypeId::kInt8, + PostgresTypeId::kInterval, + PostgresTypeId::kJson, + PostgresTypeId::kJsonb, + PostgresTypeId::kJsonpath, + PostgresTypeId::kLine, + PostgresTypeId::kLseg, + PostgresTypeId::kMacaddr, + PostgresTypeId::kMacaddr8, + PostgresTypeId::kMultirange, + PostgresTypeId::kName, + PostgresTypeId::kNumeric, + PostgresTypeId::kOid, + PostgresTypeId::kOidvector, + PostgresTypeId::kPath, + PostgresTypeId::kPgNodeTree, + PostgresTypeId::kPgNdistinct, + PostgresTypeId::kPgDependencies, + PostgresTypeId::kPgLsn, + PostgresTypeId::kPgMcvList, + PostgresTypeId::kPgDdlCommand, + PostgresTypeId::kPgSnapshot, + PostgresTypeId::kPoint, + PostgresTypeId::kPoly, + PostgresTypeId::kRegclass, + PostgresTypeId::kRegcollation, + PostgresTypeId::kRegconfig, + PostgresTypeId::kRegdictionary, + PostgresTypeId::kRegnamespace, + PostgresTypeId::kRegoperator, + PostgresTypeId::kRegoper, + PostgresTypeId::kRegprocedure, + PostgresTypeId::kRegproc, + PostgresTypeId::kRegrole, + PostgresTypeId::kRegtype, + PostgresTypeId::kText, + PostgresTypeId::kTid, + PostgresTypeId::kTime, + PostgresTypeId::kTimestamp, + PostgresTypeId::kTimestamptz, + PostgresTypeId::kTimetz, + PostgresTypeId::kTsquery, + PostgresTypeId::kTsvector, + PostgresTypeId::kTxidSnapshot, + PostgresTypeId::kUnknown, + PostgresTypeId::kUuid, + PostgresTypeId::kVarbit, + PostgresTypeId::kVarchar, + PostgresTypeId::kVoid, + PostgresTypeId::kXid8, + PostgresTypeId::kXid, + PostgresTypeId::kXml}; + + if (nested) { + base.push_back(PostgresTypeId::kArray); + base.push_back(PostgresTypeId::kRecord); + base.push_back(PostgresTypeId::kRange); + base.push_back(PostgresTypeId::kDomain); + } + + return base; +} + } // namespace adbcpq From 5388371d36f021c352b7476b02eec43a9b969115 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Apr 2023 10:00:38 -0300 Subject: [PATCH 88/90] more undoing of implementation file --- r/adbcpostgresql/bootstrap.R | 1 - r/adbcpostgresql/src/.gitignore | 1 - r/adbcpostgresql/src/Makevars.in | 1 - r/adbcpostgresql/src/Makevars.ucrt | 1 - 4 files changed, 4 deletions(-) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 6ffc043798..b7c84fc5ae 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -21,7 +21,6 @@ files_to_vendor <- c( "../../adbc.h", "../../c/driver/postgresql/util.h", "../../c/driver/postgresql/postgres_type.h", - "../../c/driver/postgresql/postgres_type.cc", "../../c/driver/postgresql/statement.h", "../../c/driver/postgresql/statement.cc", "../../c/driver/postgresql/connection.h", diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore index 9c8a8fb23f..1e652d761b 100644 --- a/r/adbcpostgresql/src/.gitignore +++ b/r/adbcpostgresql/src/.gitignore @@ -27,6 +27,5 @@ postgresql.cc statement.h statement.cc postgres_type.h -postgres_type.cc util.h Makevars diff --git a/r/adbcpostgresql/src/Makevars.in b/r/adbcpostgresql/src/Makevars.in index 7d772b0ded..c062d1176e 100644 --- a/r/adbcpostgresql/src/Makevars.in +++ b/r/adbcpostgresql/src/Makevars.in @@ -23,5 +23,4 @@ OBJECTS = init.o \ database.o \ statement.o \ postgresql.o \ - postgres_type.o \ nanoarrow/nanoarrow.o diff --git a/r/adbcpostgresql/src/Makevars.ucrt b/r/adbcpostgresql/src/Makevars.ucrt index cec428ee34..0fc2d0c0af 100644 --- a/r/adbcpostgresql/src/Makevars.ucrt +++ b/r/adbcpostgresql/src/Makevars.ucrt @@ -23,5 +23,4 @@ OBJECTS = init.o \ database.o \ statement.o \ postgresql.o \ - postgres_type.o \ nanoarrow/nanoarrow.o From 978b5c3a3988967fe56d1fe91210fb40fe2186fa Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Apr 2023 10:03:29 -0300 Subject: [PATCH 89/90] one more definition to remove --- c/driver/postgresql/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 3a05da85a3..e14ea20dcc 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -84,7 +84,6 @@ if(ADBC_BUILD_TESTS) nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-postgresql-test PRIVATE cxx_std_17) - target_compile_definitions(adbc-driver-postgresql-test PRIVATE -DADBC_BUILDING_TESTS) adbc_configure_target(adbc-driver-postgresql-test) endif() From 68ff9d8560bee23bc74f0b8dc5f8360a7880de03 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Apr 2023 10:30:11 -0300 Subject: [PATCH 90/90] don't use devel pak --- .github/workflows/native-unix.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/native-unix.yml b/.github/workflows/native-unix.yml index ee3c5e970e..82c99bdc1d 100644 --- a/.github/workflows/native-unix.yml +++ b/.github/workflows/native-unix.yml @@ -501,7 +501,6 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - pak-version: devel extra-packages: any::rcmdcheck, local::../adbcdrivermanager needs: check working-directory: r/${{ matrix.config.pkg }}