Skip to content

Commit

Permalink
feat(glib): Add garrow_connection_get_statistics() (apache#1744)
Browse files Browse the repository at this point in the history
Fixes apache#1743.

We use PostgreSQL instead of SQLite for testing. Because the SQLite
driver doesn't support GetStatistics yet.
  • Loading branch information
kou authored and David Coe committed Apr 25, 2024
1 parent 59e92b1 commit 0a8ede6
Show file tree
Hide file tree
Showing 10 changed files with 930 additions and 109 deletions.
1 change: 1 addition & 0 deletions ci/conda_env_glib.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ arrow-c-glib
glib
gobject-introspection
meson
postgresql
ruby
116 changes: 116 additions & 0 deletions glib/adbc-glib/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,35 @@
#define BOOLEAN_TO_OPTION_VALUE(boolean) \
((boolean) ? ADBC_OPTION_VALUE_ENABLED : ADBC_OPTION_VALUE_DISABLED)

/**
* gadbc_statistic_key_to_string:
* @key: A #GADBCStatisticsKey.
*
* Returns: The name of @key.
*
* Since: 1.0.0
*/
const gchar* gadbc_statistic_key_to_string(GADBCStatisticKey key) {
switch (key) {
case GADBC_STATISTIC_KEY_AVERAGE_BYTE_WIDTH:
return ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_NAME;
case GADBC_STATISTIC_KEY_DISTINCT_COUNT:
return ADBC_STATISTIC_DISTINCT_COUNT_NAME;
case GADBC_STATISTIC_KEY_MAX_BYTE_WIDTH:
return ADBC_STATISTIC_MAX_BYTE_WIDTH_NAME;
case GADBC_STATISTIC_KEY_MAX_VALUE:
return ADBC_STATISTIC_MAX_VALUE_NAME;
case GADBC_STATISTIC_KEY_MIN_VALUE:
return ADBC_STATISTIC_MIN_VALUE_NAME;
case GADBC_STATISTIC_KEY_NULL_COUNT:
return ADBC_STATISTIC_NULL_COUNT_NAME;
case GADBC_STATISTIC_KEY_ROW_COUNT:
return ADBC_STATISTIC_ROW_COUNT_NAME;
default:
return "adbc.statistic.invalid";
}
}

/**
* gadbc_isolation_level_to_string:
* @level: A #GADBCIsolationLevel.
Expand Down Expand Up @@ -538,6 +567,93 @@ gpointer gadbc_connection_get_table_types(GADBCConnection* connection, GError**
}
}

/**
* gadbc_connection_get_statistics:
* @connection: A #GADBCConnection.
* @catalog: (nullable): A catalog or %NULL if not applicable.
* @db_schema: (nullable): A database schema or %NULL if not applicable.
* @table_name: (nullable): A table name.
* @approximate: Whether approximate values are allowed or not. If
* this is %TRUE, best-effort, approximate or cached values may be
* returned. Otherwise, exact values are requested. Note that the
* database may return approximate values regardless as indicated
* in the result. Request exact values may be expensive or
* unsupported.
* @error: (nullable): Return location for a #GError or %NULL.
*
* The result is an Arrow dataset with the following schema:
*
* | Field Name | Field Type |
* |--------------------------|----------------------------------|
* | catalog_name | utf8 |
* | catalog_db_schemas | list<DB_SCHEMA_SCHEMA> not null |
*
* DB_SCHEMA_SCHEMA is a Struct with fields:
*
* | Field Name | Field Type |
* |--------------------------|----------------------------------|
* | db_schema_name | utf8 |
* | db_schema_statistics | list<STATISTICS_SCHEMA> not null |
*
* STATISTICS_SCHEMA is a Struct with fields:
*
* | Field Name | Field Type | Comments |
* |--------------------------|----------------------------------| -------- |
* | table_name | utf8 not null | |
* | column_name | utf8 | (1) |
* | statistic_key | int16 not null | (2) |
* | statistic_value | VALUE_SCHEMA not null | |
* | statistic_is_approximate | bool not null | (3) |
*
* 1. If null, then the statistic applies to the entire table.
* 2. A dictionary-encoded statistic name (although we do not use the Arrow
* dictionary type). Values in [0, 1024) are reserved for ADBC. Other
* values are for implementation-specific statistics. For the definitions
* of predefined statistic types, see %GADBCStatistics. To get
* driver-specific statistic names, use
* gadbc_connection_get_statistic_names().
* 3. If true, then the value is approximate or best-effort.
*
* VALUE_SCHEMA is a dense union with members:
*
* | Field Name | Field Type |
* |--------------------------|----------------------------------|
* | int64 | int64 |
* | uint64 | uint64 |
* | float64 | float64 |
* | binary | binary |
*
* Returns: The result set as `struct ArrowArrayStream *`. It should
* be freed with the `ArrowArrayStream:release` callback then
* g_free() when no longer needed.
*
* This GADBCConnection must outlive the returned stream.
*
* Since: 1.0.0
*/
gpointer gadbc_connection_get_statistics(GADBCConnection* connection,
const gchar* catalog, const gchar* db_schema,
const gchar* table_name, gboolean approximate,
GError** error) {
const gchar* context = "[adbc][connection][get-statistics]";
struct AdbcConnection* adbc_connection =
gadbc_connection_get_raw(connection, context, error);
if (!adbc_connection) {
return NULL;
}
struct ArrowArrayStream* array_stream = g_new0(struct ArrowArrayStream, 1);
struct AdbcError adbc_error = {};
AdbcStatusCode status_code =
AdbcConnectionGetStatistics(adbc_connection, catalog, db_schema, table_name,
approximate, array_stream, &adbc_error);
if (gadbc_error_check(error, status_code, &adbc_error, context)) {
return array_stream;
} else {
g_free(array_stream);
return NULL;
}
}

/**
* gadbc_connection_commit:
* @connection: A #GADBCConnection.
Expand Down
73 changes: 73 additions & 0 deletions glib/adbc-glib/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@

G_BEGIN_DECLS

/**
* GADBC_VERSION_1_0_0:
*
* ADBC revision 1.0.0.
*
* Since: 1.0.0
*/
#define GADBC_VERSION_1_0_0 1000000

/**
* GADBC_VERSION_1_1_0:
*
* ADBC revision 1.1.0.
*
* Since: 1.0.0
*/
#define GADBC_VERSION_1_1_0 1001000

/**
* GADBCInfo:
* @GADBC_INFO_VENDOR_NAME: The database vendor/product name (e.g. the
Expand All @@ -35,6 +53,8 @@ G_BEGIN_DECLS
* @GADBC_INFO_DRIVER_VERSION: The driver version (type: utf8).
* @GADBC_INFO_DRIVER_ARROW_VERSION: The driver Arrow library version
* (type: utf8).
* @GADBC_INFO_DRIVER_ADBC_VERSION: The driver ADBC version
* (type: int64).
*
* The information code that is used by gadbc_connection_get_info().
*
Expand All @@ -49,6 +69,7 @@ typedef enum {
GADBC_INFO_DRIVER_NAME = 100,
GADBC_INFO_DRIVER_VERSION = 101,
GADBC_INFO_DRIVER_ARROW_VERSION = 102,
GADBC_INFO_DRIVER_ADBC_VERSION = 103,
} GADBCInfo;

/**
Expand Down Expand Up @@ -76,6 +97,53 @@ typedef enum {
GADBC_OBJECT_DEPTH_TABLES = 3,
} GADBCObjectDepth;

/**
* GADBCStatisticKey:
* @GADBC_STATISTICS_KEY_AVERAGE_BYTE_WIDTH: The average byte
* width statistic. The average size in bytes of a row in the
* column. Value type is float64.
* For example, this is roughly the average length of a string for a
* string column.Return metadata on catalogs, schemas, tables, and
* columns.
* @GADBC_STATISTICS_KEY_DISTINCT_COUNT: The distinct value count
* (NDV) statistic. The number of distinct values in the column.
* Value type is int64 (when not approximate) or float64 (when
* approximate).
* @GADBC_STATISTICS_KEY_MAX_BYTE_WIDTH: The max byte width statistic.
* The maximum size in bytes of a row in the column. Value type is
* int64 (when not approximate) or float64 (when approximate).
* For example, this is the maximum length of a string for a string
* column.
* @GADBC_STATISTICS_KEY_MAX_VALUE: The max value statistic. Value
* type is column-dependent.
* @GADBC_STATISTICS_KEY_MIN_VALUE: The min value statistic. Value
* type is column-dependent.
* @GADBC_STATISTICS_KEY_NULL_COUNT: The null count statistic. The
* number of values that are null in the column. Value type is
* int64 (when not approximate) or float64 (when approximate).
* @GADBC_STATISTICS_KEY_ROW_COUNT: The row count statistic. The
* number of rows in the column or table. Value type is int64 (when
* not approximate) or float64 (when approximate).
*
* Standard statistic names for gadbc_connection_get_statistics().
*
* They are corresponding to `ADBC_STATISTIC_*_KEY` values in `adbc.h`.
*
* Since: 1.0.0
*/
typedef enum {
GADBC_STATISTIC_KEY_AVERAGE_BYTE_WIDTH = 0,
GADBC_STATISTIC_KEY_DISTINCT_COUNT = 1,
GADBC_STATISTIC_KEY_MAX_BYTE_WIDTH = 2,
GADBC_STATISTIC_KEY_MAX_VALUE = 3,
GADBC_STATISTIC_KEY_MIN_VALUE = 4,
GADBC_STATISTIC_KEY_NULL_COUNT = 5,
GADBC_STATISTIC_KEY_ROW_COUNT = 6,
} GADBCStatisticKey;

GADBC_AVAILABLE_IN_1_0
const gchar* gadbc_statistic_key_to_string(GADBCStatisticKey key);

/**
* GADBCIsolationLevel:
* @GADBC_ISOLATION_LEVEL_DEFAULT: Use database or driver default
Expand Down Expand Up @@ -179,6 +247,11 @@ gpointer gadbc_connection_get_table_schema(GADBCConnection* connection,
const gchar* table_name, GError** error);
GADBC_AVAILABLE_IN_0_4
gpointer gadbc_connection_get_table_types(GADBCConnection* connection, GError** error);
GADBC_AVAILABLE_IN_1_0
gpointer gadbc_connection_get_statistics(GADBCConnection* connection,
const gchar* catalog, const gchar* db_schema,
const gchar* table_name, gboolean approximate,
GError** error);
GADBC_AVAILABLE_IN_0_4
gboolean gadbc_connection_commit(GADBCConnection* connection, GError** error);
GADBC_AVAILABLE_IN_0_4
Expand Down
20 changes: 10 additions & 10 deletions glib/adbc-glib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ definition_headers = files(
'statement.h',
)

version_h_conf = configuration_data()
version_h_conf.set('GADBC_VERSION', meson.project_version())
version_h_conf.set('GADBC_VERSION_MAJOR', version_major)
version_h_conf.set('GADBC_VERSION_MINOR', version_minor)
version_h_conf.set('GADBC_VERSION_MICRO', version_micro)
version_h = configure_file(input: 'version.h.in',
output: 'version.h',
configuration: version_h_conf)
definition_headers += version_h

headers = definition_headers
headers += files(
'adbc-glib-raw.h',
Expand All @@ -41,16 +51,6 @@ headers += files(
'statement-raw.h',
)

version_h_conf = configuration_data()
version_h_conf.set('GADBC_VERSION', meson.project_version())
version_h_conf.set('GADBC_VERSION_MAJOR', version_major)
version_h_conf.set('GADBC_VERSION_MINOR', version_minor)
version_h_conf.set('GADBC_VERSION_MICRO', version_micro)
version_h = configure_file(input: 'version.h.in',
output: 'version.h',
configuration: version_h_conf)
headers += version_h

enums = gnome.mkenums_simple('enum-types',
identifier_prefix: 'GADBC',
sources: definition_headers,
Expand Down
23 changes: 23 additions & 0 deletions glib/adbc-glib/version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,15 @@
# define GADBC_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
#endif

/**
* GADBC_VERSION_1_0:
*
* You can use this macro value for compile time API version check.
*
* Since: 1.0.0
*/
#define GADBC_VERSION_1_0 G_ENCODE_VERSION(1, 0)

/**
* GADBC_VERSION_0_10:
*
Expand Down Expand Up @@ -174,6 +183,20 @@

#define GADBC_AVAILABLE_IN_ALL

#if GADBC_VERSION_MIN_REQUIRED >= GADBC_VERSION_1_0
# define GADBC_DEPRECATED_IN_1_0 GADBC_DEPRECATED
# define GADBC_DEPRECATED_IN_1_0_FOR(function) GADBC_DEPRECATED_FOR(function)
#else
# define GADBC_DEPRECATED_IN_1_0
# define GADBC_DEPRECATED_IN_1_0_FOR(function)
#endif

#if GADBC_VERSION_MAX_ALLOWED < GADBC_VERSION_1_0
# define GADBC_AVAILABLE_IN_1_0 GADBC_UNAVAILABLE(1, 0)
#else
# define GADBC_AVAILABLE_IN_1_0
#endif

#if GADBC_VERSION_MIN_REQUIRED >= GADBC_VERSION_0_10
# define GADBC_DEPRECATED_IN_0_10 GADBC_DEPRECATED
# define GADBC_DEPRECATED_IN_0_10_FOR(function) GADBC_DEPRECATED_FOR(function)
Expand Down
24 changes: 17 additions & 7 deletions glib/test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,28 @@ def require_gi_bindings(major, minor, micro)
omit(message)
end

def import_array_stream(c_abi_array_stream)
begin
reader = Arrow::RecordBatchReader.import(c_abi_array_stream)
begin
yield(reader)
ensure
reader.unref
end
ensure
GLib.free(c_abi_array_stream)
end
end

def execute_statement(statement, need_result: true)
_, c_abi_array_stream, n_rows_affected = statement.execute(need_result)
begin
if need_result
reader = Arrow::RecordBatchReader.import(c_abi_array_stream)
if need_result
import_array_stream(c_abi_array_stream) do |reader|
table = reader.read_all
yield(table, n_rows_affected) if block_given?
else
yield(n_rows_affected) if block_given?
end
ensure
GLib.free(c_abi_array_stream) if need_result
else
yield(n_rows_affected) if block_given?
end
end

Expand Down
Loading

0 comments on commit 0a8ede6

Please sign in to comment.