From b64fd5e1b1a025fc2f2e7c6b934c71a5c7c3fe60 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 22 Apr 2024 17:45:21 +0900 Subject: [PATCH 1/2] feat(glib): Add garrow_connection_get_statistics() Fixes #1743. We use PostgreSQL instead of SQLite for testing. Because the SQLite driver doesn't support GetStatistics yet. --- glib/adbc-glib/connection.c | 116 ++++++++++ glib/adbc-glib/connection.h | 73 ++++++ glib/adbc-glib/meson.build | 20 +- glib/adbc-glib/version.h.in | 23 ++ glib/test/helper.rb | 24 +- glib/test/helper/sandbox.rb | 369 ++++++++++++++++++++++++++++++ glib/test/run.rb | 3 + glib/test/test-connection.rb | 385 ++++++++++++++++++++++++-------- glib/test/test-statistic-key.rb | 25 +++ 9 files changed, 929 insertions(+), 109 deletions(-) create mode 100644 glib/test/helper/sandbox.rb create mode 100644 glib/test/test-statistic-key.rb diff --git a/glib/adbc-glib/connection.c b/glib/adbc-glib/connection.c index bcb541c5d5..14ec48f067 100644 --- a/glib/adbc-glib/connection.c +++ b/glib/adbc-glib/connection.c @@ -35,6 +35,35 @@ #define BOOLEAN_TO_OPTION_VALUE(boolean) \ ((boolean) ? ADBC_OPTION_VALUE_ENABLED : ADBC_OPTION_VALUE_DISABLED) +/** + * gadbc_statistic_key_to_string: + * @key: A #GADBCStatisticsKey. + * + * Returns: The name of @key. + * + * Since: 1.0.0 + */ +const gchar* gadbc_statistic_key_to_string(GADBCStatisticKey key) { + switch (key) { + case GADBC_STATISTIC_KEY_AVERAGE_BYTE_WIDTH: + return ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_NAME; + case GADBC_STATISTIC_KEY_DISTINCT_COUNT: + return ADBC_STATISTIC_DISTINCT_COUNT_NAME; + case GADBC_STATISTIC_KEY_MAX_BYTE_WIDTH: + return ADBC_STATISTIC_MAX_BYTE_WIDTH_NAME; + case GADBC_STATISTIC_KEY_MAX_VALUE: + return ADBC_STATISTIC_MAX_VALUE_NAME; + case GADBC_STATISTIC_KEY_MIN_VALUE: + return ADBC_STATISTIC_MIN_VALUE_NAME; + case GADBC_STATISTIC_KEY_NULL_COUNT: + return ADBC_STATISTIC_NULL_COUNT_NAME; + case GADBC_STATISTIC_KEY_ROW_COUNT: + return ADBC_STATISTIC_ROW_COUNT_NAME; + default: + return "adbc.statistic.invalid"; + } +} + /** * gadbc_isolation_level_to_string: * @level: A #GADBCIsolationLevel. @@ -538,6 +567,93 @@ gpointer gadbc_connection_get_table_types(GADBCConnection* connection, GError** } } +/** + * gadbc_connection_get_statistics: + * @connection: A #GADBCConnection. + * @catalog: (nullable): A catalog or %NULL if not applicable. + * @db_schema: (nullable): A database schema or %NULL if not applicable. + * @table_name: (nullable): A table name. + * @approximate: Whether approximate values are allowed or not. If + * this is %TRUE, best-effort, approximate or cached values may be + * returned. Otherwise, exact values are requested. Note that the + * database may return approximate values regardless as indicated + * in the result. Request exact values may be expensive or + * unsupported. + * @error: (nullable): Return location for a #GError or %NULL. + * + * The result is an Arrow dataset with the following schema: + * + * | Field Name | Field Type | + * |--------------------------|----------------------------------| + * | catalog_name | utf8 | + * | catalog_db_schemas | list not null | + * + * DB_SCHEMA_SCHEMA is a Struct with fields: + * + * | Field Name | Field Type | + * |--------------------------|----------------------------------| + * | db_schema_name | utf8 | + * | db_schema_statistics | list not null | + * + * STATISTICS_SCHEMA is a Struct with fields: + * + * | Field Name | Field Type | Comments | + * |--------------------------|----------------------------------| -------- | + * | table_name | utf8 not null | | + * | column_name | utf8 | (1) | + * | statistic_key | int16 not null | (2) | + * | statistic_value | VALUE_SCHEMA not null | | + * | statistic_is_approximate | bool not null | (3) | + * + * 1. If null, then the statistic applies to the entire table. + * 2. A dictionary-encoded statistic name (although we do not use the Arrow + * dictionary type). Values in [0, 1024) are reserved for ADBC. Other + * values are for implementation-specific statistics. For the definitions + * of predefined statistic types, see %GADBCStatistics. To get + * driver-specific statistic names, use + * gadbc_connection_get_statistic_names(). + * 3. If true, then the value is approximate or best-effort. + * + * VALUE_SCHEMA is a dense union with members: + * + * | Field Name | Field Type | + * |--------------------------|----------------------------------| + * | int64 | int64 | + * | uint64 | uint64 | + * | float64 | float64 | + * | binary | binary | + * + * Returns: The result set as `struct ArrowArrayStream *`. It should + * be freed with the `ArrowArrayStream:release` callback then + * g_free() when no longer needed. + * + * This GADBCConnection must outlive the returned stream. + * + * Since: 1.0.0 + */ +gpointer gadbc_connection_get_statistics(GADBCConnection* connection, + const gchar* catalog, const gchar* db_schema, + const gchar* table_name, gboolean approximate, + GError** error) { + const gchar* context = "[adbc][connection][get-statistics]"; + struct AdbcConnection* adbc_connection = + gadbc_connection_get_raw(connection, context, error); + if (!adbc_connection) { + return NULL; + } + struct ArrowArrayStream* array_stream = g_new0(struct ArrowArrayStream, 1); + struct AdbcError adbc_error = {}; + AdbcStatusCode status_code = + AdbcConnectionGetStatistics(adbc_connection, catalog, db_schema, table_name, + approximate, array_stream, &adbc_error); + if (gadbc_error_check(error, status_code, &adbc_error, context)) { + return array_stream; + } else { + g_free(array_stream); + return NULL; + } +} + /** * gadbc_connection_commit: * @connection: A #GADBCConnection. diff --git a/glib/adbc-glib/connection.h b/glib/adbc-glib/connection.h index 30df5710df..210af053f8 100644 --- a/glib/adbc-glib/connection.h +++ b/glib/adbc-glib/connection.h @@ -23,6 +23,24 @@ G_BEGIN_DECLS +/** + * GADBC_VERSION_1_0_0: + * + * ADBC revision 1.0.0. + * + * Since: 1.0.0 + */ +#define GADBC_VERSION_1_0_0 1000000 + +/** + * GADBC_VERSION_1_1_0: + * + * ADBC revision 1.1.0. + * + * Since: 1.0.0 + */ +#define GADBC_VERSION_1_1_0 1001000 + /** * GADBCInfo: * @GADBC_INFO_VENDOR_NAME: The database vendor/product name (e.g. the @@ -35,6 +53,8 @@ G_BEGIN_DECLS * @GADBC_INFO_DRIVER_VERSION: The driver version (type: utf8). * @GADBC_INFO_DRIVER_ARROW_VERSION: The driver Arrow library version * (type: utf8). + * @GADBC_INFO_DRIVER_ADBC_VERSION: The driver ADBC version + * (type: int64). * * The information code that is used by gadbc_connection_get_info(). * @@ -49,6 +69,7 @@ typedef enum { GADBC_INFO_DRIVER_NAME = 100, GADBC_INFO_DRIVER_VERSION = 101, GADBC_INFO_DRIVER_ARROW_VERSION = 102, + GADBC_INFO_DRIVER_ADBC_VERSION = 103, } GADBCInfo; /** @@ -76,6 +97,53 @@ typedef enum { GADBC_OBJECT_DEPTH_TABLES = 3, } GADBCObjectDepth; +/** + * GADBCStatisticKey: + * @GADBC_STATISTICS_KEY_AVERAGE_BYTE_WIDTH: The average byte + * width statistic. The average size in bytes of a row in the + * column. Value type is float64. + * For example, this is roughly the average length of a string for a + * string column.Return metadata on catalogs, schemas, tables, and + * columns. + * @GADBC_STATISTICS_KEY_DISTINCT_COUNT: The distinct value count + * (NDV) statistic. The number of distinct values in the column. + * Value type is int64 (when not approximate) or float64 (when + * approximate). + * @GADBC_STATISTICS_KEY_MAX_BYTE_WIDTH: The max byte width statistic. + * The maximum size in bytes of a row in the column. Value type is + * int64 (when not approximate) or float64 (when approximate). + * For example, this is the maximum length of a string for a string + * column. + * @GADBC_STATISTICS_KEY_MAX_VALUE: The max value statistic. Value + * type is column-dependent. + * @GADBC_STATISTICS_KEY_MIN_VALUE: The min value statistic. Value + * type is column-dependent. + * @GADBC_STATISTICS_KEY_NULL_COUNT: The null count statistic. The + * number of values that are null in the column. Value type is + * int64 (when not approximate) or float64 (when approximate). + * @GADBC_STATISTICS_KEY_ROW_COUNT: The row count statistic. The + * number of rows in the column or table. Value type is int64 (when + * not approximate) or float64 (when approximate). + * + * Standard statistic names for gadbc_connection_get_statistics(). + * + * They are corresponding to `ADBC_STATISTIC_*_KEY` values in `adbc.h`. + * + * Since: 1.0.0 + */ +typedef enum { + GADBC_STATISTIC_KEY_AVERAGE_BYTE_WIDTH = 0, + GADBC_STATISTIC_KEY_DISTINCT_COUNT = 1, + GADBC_STATISTIC_KEY_MAX_BYTE_WIDTH = 2, + GADBC_STATISTIC_KEY_MAX_VALUE = 3, + GADBC_STATISTIC_KEY_MIN_VALUE = 4, + GADBC_STATISTIC_KEY_NULL_COUNT = 5, + GADBC_STATISTIC_KEY_ROW_COUNT = 6, +} GADBCStatisticKey; + +GADBC_AVAILABLE_IN_1_0 +const gchar* gadbc_statistic_key_to_string(GADBCStatisticKey key); + /** * GADBCIsolationLevel: * @GADBC_ISOLATION_LEVEL_DEFAULT: Use database or driver default @@ -179,6 +247,11 @@ gpointer gadbc_connection_get_table_schema(GADBCConnection* connection, const gchar* table_name, GError** error); GADBC_AVAILABLE_IN_0_4 gpointer gadbc_connection_get_table_types(GADBCConnection* connection, GError** error); +GADBC_AVAILABLE_IN_1_0 +gpointer gadbc_connection_get_statistics(GADBCConnection* connection, + const gchar* catalog, const gchar* db_schema, + const gchar* table_name, gboolean approximate, + GError** error); GADBC_AVAILABLE_IN_0_4 gboolean gadbc_connection_commit(GADBCConnection* connection, GError** error); GADBC_AVAILABLE_IN_0_4 diff --git a/glib/adbc-glib/meson.build b/glib/adbc-glib/meson.build index 4496463c83..60d892ff3d 100644 --- a/glib/adbc-glib/meson.build +++ b/glib/adbc-glib/meson.build @@ -31,6 +31,16 @@ definition_headers = files( 'statement.h', ) +version_h_conf = configuration_data() +version_h_conf.set('GADBC_VERSION', meson.project_version()) +version_h_conf.set('GADBC_VERSION_MAJOR', version_major) +version_h_conf.set('GADBC_VERSION_MINOR', version_minor) +version_h_conf.set('GADBC_VERSION_MICRO', version_micro) +version_h = configure_file(input: 'version.h.in', + output: 'version.h', + configuration: version_h_conf) +definition_headers += version_h + headers = definition_headers headers += files( 'adbc-glib-raw.h', @@ -41,16 +51,6 @@ headers += files( 'statement-raw.h', ) -version_h_conf = configuration_data() -version_h_conf.set('GADBC_VERSION', meson.project_version()) -version_h_conf.set('GADBC_VERSION_MAJOR', version_major) -version_h_conf.set('GADBC_VERSION_MINOR', version_minor) -version_h_conf.set('GADBC_VERSION_MICRO', version_micro) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) -headers += version_h - enums = gnome.mkenums_simple('enum-types', identifier_prefix: 'GADBC', sources: definition_headers, diff --git a/glib/adbc-glib/version.h.in b/glib/adbc-glib/version.h.in index a7a8837970..72b7aa80e2 100644 --- a/glib/adbc-glib/version.h.in +++ b/glib/adbc-glib/version.h.in @@ -100,6 +100,15 @@ # define GADBC_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif +/** + * GADBC_VERSION_1_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 1.0.0 + */ +#define GADBC_VERSION_1_0 G_ENCODE_VERSION(1, 0) + /** * GADBC_VERSION_0_10: * @@ -174,6 +183,20 @@ #define GADBC_AVAILABLE_IN_ALL +#if GADBC_VERSION_MIN_REQUIRED >= GADBC_VERSION_1_0 +# define GADBC_DEPRECATED_IN_1_0 GADBC_DEPRECATED +# define GADBC_DEPRECATED_IN_1_0_FOR(function) GADBC_DEPRECATED_FOR(function) +#else +# define GADBC_DEPRECATED_IN_1_0 +# define GADBC_DEPRECATED_IN_1_0_FOR(function) +#endif + +#if GADBC_VERSION_MAX_ALLOWED < GADBC_VERSION_1_0 +# define GADBC_AVAILABLE_IN_1_0 GADBC_UNAVAILABLE(1, 0) +#else +# define GADBC_AVAILABLE_IN_1_0 +#endif + #if GADBC_VERSION_MIN_REQUIRED >= GADBC_VERSION_0_10 # define GADBC_DEPRECATED_IN_0_10 GADBC_DEPRECATED # define GADBC_DEPRECATED_IN_0_10_FOR(function) GADBC_DEPRECATED_FOR(function) diff --git a/glib/test/helper.rb b/glib/test/helper.rb index 7054f9f31f..a3a19e6c5e 100644 --- a/glib/test/helper.rb +++ b/glib/test/helper.rb @@ -28,18 +28,28 @@ def require_gi_bindings(major, minor, micro) omit(message) end + def import_array_stream(c_abi_array_stream) + begin + reader = Arrow::RecordBatchReader.import(c_abi_array_stream) + begin + yield(reader) + ensure + reader.unref + end + ensure + GLib.free(c_abi_array_stream) + end + end + def execute_statement(statement, need_result: true) _, c_abi_array_stream, n_rows_affected = statement.execute(need_result) - begin - if need_result - reader = Arrow::RecordBatchReader.import(c_abi_array_stream) + if need_result + import_array_stream(c_abi_array_stream) do |reader| table = reader.read_all yield(table, n_rows_affected) if block_given? - else - yield(n_rows_affected) if block_given? end - ensure - GLib.free(c_abi_array_stream) if need_result + else + yield(n_rows_affected) if block_given? end end diff --git a/glib/test/helper/sandbox.rb b/glib/test/helper/sandbox.rb new file mode 100644 index 0000000000..f3ce2ea384 --- /dev/null +++ b/glib/test/helper/sandbox.rb @@ -0,0 +1,369 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "fileutils" +require "socket" + +module Helper + class CommandRunError < StandardError + attr_reader :commane_line + attr_reader :output + attr_reader :error + def initialize(command_line, output, error) + @command_line = command_line + @output = output + @error = error + message = +"failed to run: " + message << command_line.join(" ") + message << "\n" + message << "output:\n" + message << output + message << "error:\n" + message << error + super(message) + end + end + + module CommandRunnable + def spawn_process(*command_line) + env = { + "LC_ALL" => "C", + "PGCLIENTENCODING" => "UTF-8", + } + IO.pipe do |input_read, input_write| + input_write.sync = true + IO.pipe do |output_read, output_write| + IO.pipe do |error_read, error_write| + options = { + in: input_read, + out: output_write, + err: error_write, + } + pid = spawn(env, *command_line, options) + begin + input_read.close + output_write.close + error_write.close + yield(pid, input_write, output_read, error_read) + ensure + finished = false + begin + finished = !Process.waitpid(pid, Process::WNOHANG).nil? + rescue SystemCallError + # Finished + else + unless finished + Process.kill(:KILL, pid) + Process.waitpid(pid) + end + end + end + end + end + end + end + + def read_command_output_all(input, initial_timeout: 1) + all_output = +"" + timeout = initial_timeout + loop do + break unless IO.select([input], nil, nil, timeout) + all_output << read_command_output(input) + timeout = 0 + end + all_output + end + + def read_command_output(input) + return "" unless IO.select([input], nil, nil, 0) + begin + data = input.readpartial(4096).gsub(/\r\n/, "\n") + data.force_encoding("UTF-8") + data + rescue EOFError + "" + end + end + + def run_command(*command_line) + spawn_process(*command_line) do |pid, input_write, output_read, error_read| + output = +"" + error = +"" + status = nil + timeout = 1 + if block_given? + begin + yield(input_write, output_read, output_read) + ensure + input_write.close unless input_write.closed? + end + end + loop do + readables, = IO.select([output_read, error_read], nil, nil, timeout) + if readables + timeout = 0 + readables.each do |readable| + if readable == output_read + output << read_command_output(output_read) + else + error << read_command_output(error_read) + end + end + else + timeout = 1 + end + _, status = Process.waitpid2(pid, Process::WNOHANG) + break if status + end + output << read_command_output(output_read) + error << read_command_output(error_read) + unless status.success? + raise CommandRunError.new(command_line, output, error) + end + [output, error] + end + end + end + + class PostgreSQL + include CommandRunnable + + attr_reader :dir + attr_reader :host + attr_reader :port + attr_reader :user + attr_reader :version + def initialize(base_dir) + @base_dir = base_dir + @dir = nil + @log_base_name = "postgresql.log" + @log_path = nil + @host = "127.0.0.1" + @port = nil + @user = "adbc" + @version = nil + @pid = nil + @running = false + end + + def running? + @running + end + + def initdb(db_path: "db", + port: 15432) + @dir = File.join(@base_dir, db_path) + @log_path = File.join(@dir, "log", @log_base_name) + @port = port + run_command("initdb", + "--locale", "C", + "--encoding", "UTF-8", + "--username", @user, + "-D", @dir) + postgresql_conf = File.join(@dir, "postgresql.conf") + File.open(postgresql_conf, "a") do |conf| + conf.puts("listen_addresses = '#{@host}'") + conf.puts("port = #{@port}") + conf.puts("logging_collector = on") + conf.puts("log_filename = '#{@log_base_name}'") + yield(conf) if block_given? + end + @version = Integer(File.read(File.join(@dir, "PG_VERSION")).chomp, 10) + end + + def start + begin + run_command("pg_ctl", "start", + "-w", + "-D", @dir) + rescue => error + error.message << "\nPostgreSQL log:\n#{read_log}" + raise + end + loop do + begin + TCPSocket.open(@host, @port) do + end + rescue SystemCallError + sleep(0.1) + else + break + end + end + @running = true + pid_path = File.join(@dir, "postmaster.pid") + if File.exist?(pid_path) + first_line = File.readlines(pid_path, chomp: true)[0] + begin + @pid = Integer(first_line, 10) + rescue ArgumentError + end + end + end + + def stop + return unless running? + begin + run_command("pg_ctl", "stop", + "-D", @dir) + rescue CommandRunError => error + if @pid + Process.kill(:KILL, @pid) + @pid = nil + @running = false + end + error.message << "\nPostgreSQL log:\n#{read_log}" + raise + else + @pid = nil + @running = false + end + end + + def psql(db, *sqls, &block) + command_line = [ + "psql", + "--host", @host, + "--port", @port.to_s, + "--username", @user, + "--dbname", db, + "--echo-all", + "--no-psqlrc", + ] + sqls.each do |sql| + command_line << "--command" << sql + end + output, error = run_command(*command_line, &block) + output = normalize_output(output) + [output, error] + end + + def read_log + return "" unless File.exist?(@log_path) + File.read(@log_path) + end + + private + def normalize_output(output) + normalized_output = +"" + output.each_line do |line| + case line.chomp + when "SET", "CREATE EXTENSION" + next + end + normalized_output << line + end + normalized_output + end + end + + module Sandbox + include CommandRunnable + + class << self + def included(base) + base.module_eval do + setup :setup_tmp_dir + teardown :teardown_tmp_dir + + setup :setup_db + teardown :teardown_db + + setup :setup_postgres + teardown :teardown_postgres + + setup :setup_test_db + teardown :teardown_test_db + end + end + end + + def adbc_uri + host = @postgresql.host + port = @postgresql.port + user = @postgresql.user + "postgresql://#{host}:#{port}/#{@test_db_name}?user=#{user}" + end + + def psql(db, *sqls, **options, &block) + @postgresql.psql(db, *sqls, **options, &block) + end + + def run_sql(*sqls, **options, &block) + psql(@test_db_name, *sqls, **options, &block) + end + + def setup_tmp_dir + memory_fs = "/dev/shm" + if File.exist?(memory_fs) + @tmp_dir = File.join(memory_fs, "adbc") + else + @tmp_dir = File.join(__dir__, "tmp") + end + FileUtils.rm_rf(@tmp_dir) + FileUtils.mkdir_p(@tmp_dir) + end + + def teardown_tmp_dir + FileUtils.rm_rf(@tmp_dir) + end + + def setup_db + @postgresql = PostgreSQL.new(@tmp_dir) + begin + @postgresql.initdb + rescue SystemCallError => error + omit("PostgreSQL isn't available: #{error}") + end + end + + def teardown_db + end + + def start_postgres + @postgresql.start + end + + def stop_postgres + @postgresql.stop + end + + def setup_postgres + start_postgres + end + + def teardown_postgres + stop_postgres if @postgresql + end + + def create_db(postgresql, db_name) + postgresql.psql("postgres", "CREATE DATABASE #{db_name}") + end + + def setup_test_db + @test_db_name = "test" + create_db(@postgresql, @test_db_name) + result, = run_sql("SELECT oid FROM pg_catalog.pg_database " + + "WHERE datname = current_database()") + oid = result.lines[3].strip + @test_db_dir = File.join(@postgresql.dir, "base", oid) + end + + def teardown_test_db + end + end +end diff --git a/glib/test/run.rb b/glib/test/run.rb index 2685a1ad60..a05c4e9174 100755 --- a/glib/test/run.rb +++ b/glib/test/run.rb @@ -17,6 +17,8 @@ # specific language governing permissions and limitations # under the License. +ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "10000" + require "pathname" require "test-unit" @@ -37,5 +39,6 @@ end require_relative "helper" +require_relative "helper/sandbox" exit(Test::Unit::AutoRunner.run(true, test_dir.to_s)) diff --git a/glib/test/test-connection.rb b/glib/test/test-connection.rb index c939a0d474..12e906301b 100644 --- a/glib/test/test-connection.rb +++ b/glib/test/test-connection.rb @@ -17,18 +17,16 @@ class ConnectionTest < Test::Unit::TestCase include Helper + include Helper::Sandbox - def setup + setup def setup_database @database = ADBC::Database.new - @database.set_option("driver", "adbc_driver_sqlite") - Dir.mktmpdir do |tmp_dir| - database = File.join(tmp_dir, "test.sqlite3") - @database.set_option("uri", database) - @database.init - open_connection do |connection| - @connection = connection - yield - end + @database.set_option("driver", "adbc_driver_postgresql") + @database.set_option("uri", adbc_uri) + @database.init + open_connection do |connection| + @connection = connection + yield end end @@ -42,7 +40,12 @@ def open_connection end end - def normalize_version(version) + def normalize_vendor_version(version) + return nil if version.nil? + version.gsub(/\A\d+(?:\.\d+)*\z/, "X.Y.Z") + end + + def normalize_arrow_version(version) return nil if version.nil? version.gsub(/\A\d+\.\d+\.\d+(?:-SNAPSHOT)?\z/, "X.Y.Z") end @@ -51,9 +54,10 @@ def normalize_info(info) info.collect do |code, value| value = value.values[0] if value.is_a?(Hash) case code - when ADBC::Info::VENDOR_VERSION, - ADBC::Info::DRIVER_ARROW_VERSION - value = normalize_version(value) + when ADBC::Info::VENDOR_VERSION + value = normalize_vendor_version(value) + when ADBC::Info::DRIVER_ARROW_VERSION + value = normalize_arrow_version(value) end [code, value] end @@ -62,19 +66,17 @@ def normalize_info(info) sub_test_case("#get_info") do def test_all c_abi_array_stream = @connection.get_info - begin - reader = Arrow::RecordBatchReader.import(c_abi_array_stream) + import_array_stream(c_abi_array_stream) do |reader| table = reader.read_all assert_equal([ - [ADBC::Info::VENDOR_NAME, "SQLite"], + [ADBC::Info::VENDOR_NAME, "PostgreSQL"], [ADBC::Info::VENDOR_VERSION, "X.Y.Z"], - [ADBC::Info::DRIVER_NAME, "ADBC SQLite Driver"], + [ADBC::Info::DRIVER_NAME, "ADBC PostgreSQL Driver"], [ADBC::Info::DRIVER_VERSION, "(unknown)"], [ADBC::Info::DRIVER_ARROW_VERSION, "X.Y.Z"], + [ADBC::Info::DRIVER_ADBC_VERSION, ADBC::VERSION_1_1_0], ], normalize_info(table.raw_records)) - ensure - GLib.free(c_abi_array_stream) end end @@ -84,53 +86,50 @@ def test_multiple ADBC::Info::DRIVER_NAME, ] c_abi_array_stream = @connection.get_info(codes) - begin - reader = Arrow::RecordBatchReader.import(c_abi_array_stream) + import_array_stream(c_abi_array_stream) do |reader| table = reader.read_all assert_equal([ - [ADBC::Info::VENDOR_NAME, "SQLite"], - [ADBC::Info::DRIVER_NAME, "ADBC SQLite Driver"], + [ADBC::Info::VENDOR_NAME, "PostgreSQL"], + [ADBC::Info::DRIVER_NAME, "ADBC PostgreSQL Driver"], ], normalize_info(table.raw_records)) - ensure - GLib.free(c_abi_array_stream) end end end sub_test_case("#objects") do - def setup - super do - execute_sql(@connection, - "CREATE TABLE data (number int, string text)", - need_result: false) - execute_sql(@connection, - "INSERT INTO data VALUES (1, 'hello')", - need_result: false) - yield - end + setup def setup_schema + execute_sql(@connection, + "CREATE TABLE data (number int, string text)", + need_result: false) + execute_sql(@connection, + "INSERT INTO data VALUES (1, 'hello')", + need_result: false) + yield end def get_objects(*args) c_abi_array_stream = @connection.get_objects(*args) - begin - reader = Arrow::RecordBatchReader.import(c_abi_array_stream) + import_array_stream(c_abi_array_stream) do |reader| yield(reader.read_all) - ensure - GLib.free(c_abi_array_stream) end end def test_catalogs_all get_objects(:catalogs) do |table| - assert_equal([["main", nil]], + assert_equal([ + ["postgres", nil], + [@test_db_name, nil], + ["template1", nil], + ["template0", nil], + ], table.raw_records) end end def test_catalogs_match - get_objects(:catalogs, "main") do |table| - assert_equal([["main", nil]], + get_objects(:catalogs, @test_db_name) do |table| + assert_equal([[@test_db_name, nil]], table.raw_records) end end @@ -145,15 +144,18 @@ def test_catalogs_not_match def test_db_schemas_all get_objects(:db_schemas) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => nil, }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -162,15 +164,18 @@ def test_db_schemas_all def test_db_schemas_match get_objects(:db_schemas, nil, nil) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => nil, }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -178,7 +183,12 @@ def test_db_schemas_match def test_db_schemas_not_match get_objects(:db_schemas, nil, "nonexistent") do |table| - assert_equal([["main", []]], + assert_equal([ + ["postgres", []], + [@test_db_name, []], + ["template1", []], + ["template0", []], + ], table.raw_records) end end @@ -186,11 +196,12 @@ def test_db_schemas_not_match def test_tables_all get_objects(:tables) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => nil, @@ -202,6 +213,8 @@ def test_tables_all }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -210,11 +223,12 @@ def test_tables_all def test_tables_match get_objects(:tables, nil, nil, "data") do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => nil, @@ -226,6 +240,8 @@ def test_tables_match }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -234,15 +250,18 @@ def test_tables_match def test_tables_not_match get_objects(:tables, nil, nil, "nonexistent") do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [], }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -251,11 +270,12 @@ def test_tables_not_match def test_table_types_all get_objects(:tables) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => nil, @@ -267,6 +287,8 @@ def test_table_types_all }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -275,11 +297,12 @@ def test_table_types_all def test_table_types_match get_objects(:tables, nil, nil, nil, ["table", "view"]) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => nil, @@ -291,6 +314,8 @@ def test_table_types_match }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -299,15 +324,18 @@ def test_table_types_match def test_table_types_not_match get_objects(:tables, nil, nil, nil, ["nonexistent"]) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [], }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -316,11 +344,12 @@ def test_table_types_not_match def test_column_all get_objects(:all) do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => [ @@ -336,14 +365,14 @@ def test_column_all "xdbc_decimal_digits" => nil, "xdbc_is_autoincrement" => nil, "xdbc_is_generatedcolumn" => nil, - "xdbc_is_nullable" => "YES", - "xdbc_nullable" => 1, + "xdbc_is_nullable" => nil, + "xdbc_nullable" => nil, "xdbc_num_prec_radix" => nil, "xdbc_scope_catalog" => nil, "xdbc_scope_schema" => nil, "xdbc_scope_table" => nil, "xdbc_sql_data_type" => nil, - "xdbc_type_name" => "INT", + "xdbc_type_name" => nil, }, { "column_name" => "string", @@ -357,24 +386,26 @@ def test_column_all "xdbc_decimal_digits" => nil, "xdbc_is_autoincrement" => nil, "xdbc_is_generatedcolumn" => nil, - "xdbc_is_nullable" => "YES", - "xdbc_nullable" => 1, + "xdbc_is_nullable" => nil, + "xdbc_nullable" => nil, "xdbc_num_prec_radix" => nil, "xdbc_scope_catalog" => nil, "xdbc_scope_schema" => nil, "xdbc_scope_table" => nil, "xdbc_sql_data_type" => nil, - "xdbc_type_name" => "TEXT", + "xdbc_type_name" => nil, }, ], "table_constraints" => [], "table_name" => "data", "table_type" => "table", - }, + } ], }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -383,11 +414,12 @@ def test_column_all def test_column_match get_objects(:all, nil, nil, nil, nil, "number") do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => [ @@ -403,24 +435,26 @@ def test_column_match "xdbc_decimal_digits" => nil, "xdbc_is_autoincrement" => nil, "xdbc_is_generatedcolumn" => nil, - "xdbc_is_nullable" => "YES", - "xdbc_nullable" => 1, + "xdbc_is_nullable" => nil, + "xdbc_nullable" => nil, "xdbc_num_prec_radix" => nil, "xdbc_scope_catalog" => nil, "xdbc_scope_schema" => nil, "xdbc_scope_table" => nil, "xdbc_sql_data_type" => nil, - "xdbc_type_name" => "INT", + "xdbc_type_name" => nil, }, ], "table_constraints" => [], "table_name" => "data", "table_type" => "table", - }, + } ], }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -429,22 +463,25 @@ def test_column_match def test_column_not_match get_objects(:all, nil, nil, nil, nil, "nonexistent") do |table| assert_equal([ + ["postgres", []], [ - "main", + @test_db_name, [ { - "db_schema_name" => "", + "db_schema_name" => "public", "db_schema_tables" => [ { "table_columns" => [], "table_constraints" => [], "table_name" => "data", "table_type" => "table", - }, + } ], }, ], ], + ["template1", []], + ["template0", []], ], table.raw_records) end @@ -462,7 +499,7 @@ def test_table_schema c_abi_schema = @connection.get_table_schema(nil, nil, "data") begin schema = Arrow::Schema.import(c_abi_schema) - assert_equal(Arrow::Schema.new(number: :int64, + assert_equal(Arrow::Schema.new(number: :int32, string: :string), schema) ensure @@ -472,18 +509,22 @@ def test_table_schema def test_table_types c_abi_array_stream = @connection.table_types - begin - reader = Arrow::RecordBatchReader.import(c_abi_array_stream) + import_array_stream(c_abi_array_stream) do |reader| table = reader.read_all fields = [ Arrow::Field.new("table_type", :string, false), ] schema = Arrow::Schema.new(fields) - table_types = Arrow::StringArray.new(["table", "view"]) + table_types = Arrow::StringArray.new([ + "partitioned_table", + "foreign_table", + "toast_table", + "materialized_view", + "view", + "table", + ]) assert_equal(Arrow::Table.new(schema, [table_types]), table) - ensure - GLib.free(c_abi_array_stream) end end @@ -492,7 +533,7 @@ def test_read_only message = "[adbc][connection][set-option]" + "[NOT_IMPLEMENTED][0] " + - "[SQLite] Unknown connection option adbc.connection.readonly='false'" + "[libpq] Unknown option adbc.connection.readonly" assert_raise(ADBC::Error::NotImplemented.new(message)) do connection.read_only = false end @@ -504,15 +545,175 @@ def test_isolation_level message = "[adbc][connection][set-option]" + "[NOT_IMPLEMENTED][0] " + - "[SQLite] Unknown connection option " + - "adbc.connection.transaction.isolation_level=" + - "'adbc.connection.transaction.isolation.linearizable'" + "[libpq] Unknown option " + + "adbc.connection.transaction.isolation_level" assert_raise(ADBC::Error::NotImplemented.new(message)) do connection.isolation_level = :linearizable end end end + sub_test_case("#get_statistics") do + def normalize_statistics(statistics) + statistics.each do |name, db_schemas| + db_schemas.each do |db_schema| + db_schema["db_schema_statistics"].each do |stat| + key = stat["statistic_key"] + stat["statistic_key"] = ADBC::StatisticKey.new(key) + value = stat["statistic_value"] + stat["statistic_value"] = value.round(1) if value.is_a?(Float) + end + db_schema["db_schema_statistics"].sort_by! do |stat| + [ + stat["table_name"], + stat["column_name"] || "", + stat["statistic_key"].to_i, + ] + end + end + end + statistics + end + + def test_schema + run_sql("CREATE TABLE public.data1 (number int)") + run_sql("INSERT INTO public.data1 VALUES (1), (NULL), (2)") + run_sql("CREATE TABLE public.data2 (name text)") + run_sql("INSERT INTO public.data2 VALUES ('hello'), (NULL)") + run_sql("ANALYZE") + c_abi_array_stream = @connection.get_statistics(nil, "public", nil, true) + import_array_stream(c_abi_array_stream) do |reader| + table = reader.read_all + assert_equal( + [ + [ + @test_db_name, + [ + { + "db_schema_name" => "public", + "db_schema_statistics" => [ + { + "table_name" => "data1", + "column_name" => nil, + "statistic_key" => ADBC::StatisticKey::ROW_COUNT, + "statistic_value" => 3.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data1", + "column_name" => "number", + "statistic_key" => ADBC::StatisticKey::AVERAGE_BYTE_WIDTH, + "statistic_value" => 4.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data1", + "column_name" => "number", + "statistic_key" => ADBC::StatisticKey::DISTINCT_COUNT, + "statistic_value" => 2.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data1", + "column_name" => "number", + "statistic_key" => ADBC::StatisticKey::NULL_COUNT, + "statistic_value" => 1.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data2", + "column_name" => nil, + "statistic_key" => ADBC::StatisticKey::ROW_COUNT, + "statistic_value" => 2.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data2", + "column_name" => "name", + "statistic_key" => ADBC::StatisticKey::AVERAGE_BYTE_WIDTH, + "statistic_value" => 6.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data2", + "column_name" => "name", + "statistic_key" => ADBC::StatisticKey::DISTINCT_COUNT, + "statistic_value" => 1.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data2", + "column_name" => "name", + "statistic_key" => ADBC::StatisticKey::NULL_COUNT, + "statistic_value" => 1.0, + "statistic_is_approximate" => true, + }, + ], + }, + ], + ], + ], + normalize_statistics(table.raw_records) + ) + end + end + + def test_schema_table + run_sql("CREATE TABLE public.data1 (number int)") + run_sql("INSERT INTO public.data1 VALUES (1), (NULL), (2)") + run_sql("CREATE TABLE public.data2 (name text)") + run_sql("ANALYZE") + c_abi_array_stream = + @connection.get_statistics(nil, "public", "data1", true) + import_array_stream(c_abi_array_stream) do |reader| + table = reader.read_all + assert_equal( + [ + [ + @test_db_name, + [ + { + "db_schema_name" => "public", + "db_schema_statistics" => [ + { + "table_name" => "data1", + "column_name" => nil, + "statistic_key" => ADBC::StatisticKey::ROW_COUNT, + "statistic_value" => 3.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data1", + "column_name" => "number", + "statistic_key" => ADBC::StatisticKey::AVERAGE_BYTE_WIDTH, + "statistic_value" => 4.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data1", + "column_name" => "number", + "statistic_key" => ADBC::StatisticKey::DISTINCT_COUNT, + "statistic_value" => 2.0, + "statistic_is_approximate" => true, + }, + { + "table_name" => "data1", + "column_name" => "number", + "statistic_key" => ADBC::StatisticKey::NULL_COUNT, + "statistic_value" => 1.0, + "statistic_is_approximate" => true, + }, + ], + }, + ], + ], + ], + normalize_statistics(table.raw_records) + ) + end + end + end + def test_commit open_connection do |connection| execute_sql(connection, @@ -531,7 +732,7 @@ def test_commit open_connection do |other_connection| execute_sql(other_connection, "SELECT * FROM data") do |table,| expected = { - number: Arrow::Int64Array.new([1]), + number: Arrow::Int32Array.new([1]), string: Arrow::StringArray.new(["hello"]), } assert_equal(Arrow::Table.new(expected), @@ -542,7 +743,7 @@ def test_commit open_connection do |other_connection| execute_sql(other_connection, "SELECT * FROM data") do |table,| expected = { - number: Arrow::Int64Array.new([1, 2]), + number: Arrow::Int32Array.new([1, 2]), string: Arrow::StringArray.new(["hello", "world"]), } assert_equal(Arrow::Table.new(expected), @@ -570,7 +771,7 @@ def test_rollback open_connection do |other_connection| execute_sql(other_connection, "SELECT * FROM data") do |table,| expected = { - number: Arrow::Int64Array.new([1]), + number: Arrow::Int32Array.new([1]), string: Arrow::StringArray.new(["hello"]), } assert_equal(Arrow::Table.new(expected), @@ -581,7 +782,7 @@ def test_rollback open_connection do |other_connection| execute_sql(other_connection, "SELECT * FROM data") do |table,| expected = { - number: Arrow::Int64Array.new([1]), + number: Arrow::Int32Array.new([1]), string: Arrow::StringArray.new(["hello"]), } assert_equal(Arrow::Table.new(expected), diff --git a/glib/test/test-statistic-key.rb b/glib/test/test-statistic-key.rb new file mode 100644 index 0000000000..d3d0db9b3a --- /dev/null +++ b/glib/test/test-statistic-key.rb @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StatisticKeyTest < Test::Unit::TestCase + include Helper + + def test_to_string + assert_equal("adbc.statistic.null_count", + ADBC::StatisticKey.to_string(:null_count)) + end +end From 398a88fc2632698d86f1ab938736ec929a448c5b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Apr 2024 06:31:01 +0900 Subject: [PATCH 2/2] Install PostgreSQL for testing --- ci/conda_env_glib.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/conda_env_glib.txt b/ci/conda_env_glib.txt index cd937ee22f..883ff7f929 100644 --- a/ci/conda_env_glib.txt +++ b/ci/conda_env_glib.txt @@ -19,4 +19,5 @@ arrow-c-glib glib gobject-introspection meson +postgresql ruby