Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(glib): Add garrow_connection_get_statistics() #1744

Merged
merged 2 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/conda_env_glib.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ arrow-c-glib
glib
gobject-introspection
meson
postgresql
ruby
116 changes: 116 additions & 0 deletions glib/adbc-glib/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,35 @@
#define BOOLEAN_TO_OPTION_VALUE(boolean) \
((boolean) ? ADBC_OPTION_VALUE_ENABLED : ADBC_OPTION_VALUE_DISABLED)

/**
* gadbc_statistic_key_to_string:
* @key: A #GADBCStatisticsKey.
*
* Returns: The name of @key.
*
* Since: 1.0.0
*/
const gchar* gadbc_statistic_key_to_string(GADBCStatisticKey key) {
switch (key) {
case GADBC_STATISTIC_KEY_AVERAGE_BYTE_WIDTH:
return ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_NAME;
case GADBC_STATISTIC_KEY_DISTINCT_COUNT:
return ADBC_STATISTIC_DISTINCT_COUNT_NAME;
case GADBC_STATISTIC_KEY_MAX_BYTE_WIDTH:
return ADBC_STATISTIC_MAX_BYTE_WIDTH_NAME;
case GADBC_STATISTIC_KEY_MAX_VALUE:
return ADBC_STATISTIC_MAX_VALUE_NAME;
case GADBC_STATISTIC_KEY_MIN_VALUE:
return ADBC_STATISTIC_MIN_VALUE_NAME;
case GADBC_STATISTIC_KEY_NULL_COUNT:
return ADBC_STATISTIC_NULL_COUNT_NAME;
case GADBC_STATISTIC_KEY_ROW_COUNT:
return ADBC_STATISTIC_ROW_COUNT_NAME;
default:
return "adbc.statistic.invalid";
}
}

/**
* gadbc_isolation_level_to_string:
* @level: A #GADBCIsolationLevel.
Expand Down Expand Up @@ -538,6 +567,93 @@ gpointer gadbc_connection_get_table_types(GADBCConnection* connection, GError**
}
}

/**
* gadbc_connection_get_statistics:
* @connection: A #GADBCConnection.
* @catalog: (nullable): A catalog or %NULL if not applicable.
* @db_schema: (nullable): A database schema or %NULL if not applicable.
* @table_name: (nullable): A table name.
* @approximate: Whether approximate values are allowed or not. If
* this is %TRUE, best-effort, approximate or cached values may be
* returned. Otherwise, exact values are requested. Note that the
* database may return approximate values regardless as indicated
* in the result. Request exact values may be expensive or
* unsupported.
* @error: (nullable): Return location for a #GError or %NULL.
*
* The result is an Arrow dataset with the following schema:
*
* | Field Name | Field Type |
* |--------------------------|----------------------------------|
* | catalog_name | utf8 |
* | catalog_db_schemas | list<DB_SCHEMA_SCHEMA> not null |
*
* DB_SCHEMA_SCHEMA is a Struct with fields:
*
* | Field Name | Field Type |
* |--------------------------|----------------------------------|
* | db_schema_name | utf8 |
* | db_schema_statistics | list<STATISTICS_SCHEMA> not null |
*
* STATISTICS_SCHEMA is a Struct with fields:
*
* | Field Name | Field Type | Comments |
* |--------------------------|----------------------------------| -------- |
* | table_name | utf8 not null | |
* | column_name | utf8 | (1) |
* | statistic_key | int16 not null | (2) |
* | statistic_value | VALUE_SCHEMA not null | |
* | statistic_is_approximate | bool not null | (3) |
*
* 1. If null, then the statistic applies to the entire table.
* 2. A dictionary-encoded statistic name (although we do not use the Arrow
* dictionary type). Values in [0, 1024) are reserved for ADBC. Other
* values are for implementation-specific statistics. For the definitions
* of predefined statistic types, see %GADBCStatistics. To get
* driver-specific statistic names, use
* gadbc_connection_get_statistic_names().
* 3. If true, then the value is approximate or best-effort.
*
* VALUE_SCHEMA is a dense union with members:
*
* | Field Name | Field Type |
* |--------------------------|----------------------------------|
* | int64 | int64 |
* | uint64 | uint64 |
* | float64 | float64 |
* | binary | binary |
*
* Returns: The result set as `struct ArrowArrayStream *`. It should
* be freed with the `ArrowArrayStream:release` callback then
* g_free() when no longer needed.
*
* This GADBCConnection must outlive the returned stream.
*
* Since: 1.0.0
*/
gpointer gadbc_connection_get_statistics(GADBCConnection* connection,
const gchar* catalog, const gchar* db_schema,
const gchar* table_name, gboolean approximate,
GError** error) {
const gchar* context = "[adbc][connection][get-statistics]";
struct AdbcConnection* adbc_connection =
gadbc_connection_get_raw(connection, context, error);
if (!adbc_connection) {
return NULL;
}
struct ArrowArrayStream* array_stream = g_new0(struct ArrowArrayStream, 1);
struct AdbcError adbc_error = {};
AdbcStatusCode status_code =
AdbcConnectionGetStatistics(adbc_connection, catalog, db_schema, table_name,
approximate, array_stream, &adbc_error);
if (gadbc_error_check(error, status_code, &adbc_error, context)) {
return array_stream;
} else {
g_free(array_stream);
return NULL;
}
}

/**
* gadbc_connection_commit:
* @connection: A #GADBCConnection.
Expand Down
73 changes: 73 additions & 0 deletions glib/adbc-glib/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@

G_BEGIN_DECLS

/**
* GADBC_VERSION_1_0_0:
*
* ADBC revision 1.0.0.
*
* Since: 1.0.0
*/
#define GADBC_VERSION_1_0_0 1000000

/**
* GADBC_VERSION_1_1_0:
*
* ADBC revision 1.1.0.
*
* Since: 1.0.0
*/
#define GADBC_VERSION_1_1_0 1001000

/**
* GADBCInfo:
* @GADBC_INFO_VENDOR_NAME: The database vendor/product name (e.g. the
Expand All @@ -35,6 +53,8 @@ G_BEGIN_DECLS
* @GADBC_INFO_DRIVER_VERSION: The driver version (type: utf8).
* @GADBC_INFO_DRIVER_ARROW_VERSION: The driver Arrow library version
* (type: utf8).
* @GADBC_INFO_DRIVER_ADBC_VERSION: The driver ADBC version
* (type: int64).
*
* The information code that is used by gadbc_connection_get_info().
*
Expand All @@ -49,6 +69,7 @@ typedef enum {
GADBC_INFO_DRIVER_NAME = 100,
GADBC_INFO_DRIVER_VERSION = 101,
GADBC_INFO_DRIVER_ARROW_VERSION = 102,
GADBC_INFO_DRIVER_ADBC_VERSION = 103,
} GADBCInfo;

/**
Expand Down Expand Up @@ -76,6 +97,53 @@ typedef enum {
GADBC_OBJECT_DEPTH_TABLES = 3,
} GADBCObjectDepth;

/**
* GADBCStatisticKey:
* @GADBC_STATISTICS_KEY_AVERAGE_BYTE_WIDTH: The average byte
* width statistic. The average size in bytes of a row in the
* column. Value type is float64.
* For example, this is roughly the average length of a string for a
* string column.Return metadata on catalogs, schemas, tables, and
* columns.
* @GADBC_STATISTICS_KEY_DISTINCT_COUNT: The distinct value count
* (NDV) statistic. The number of distinct values in the column.
* Value type is int64 (when not approximate) or float64 (when
* approximate).
* @GADBC_STATISTICS_KEY_MAX_BYTE_WIDTH: The max byte width statistic.
* The maximum size in bytes of a row in the column. Value type is
* int64 (when not approximate) or float64 (when approximate).
* For example, this is the maximum length of a string for a string
* column.
* @GADBC_STATISTICS_KEY_MAX_VALUE: The max value statistic. Value
* type is column-dependent.
* @GADBC_STATISTICS_KEY_MIN_VALUE: The min value statistic. Value
* type is column-dependent.
* @GADBC_STATISTICS_KEY_NULL_COUNT: The null count statistic. The
* number of values that are null in the column. Value type is
* int64 (when not approximate) or float64 (when approximate).
* @GADBC_STATISTICS_KEY_ROW_COUNT: The row count statistic. The
* number of rows in the column or table. Value type is int64 (when
* not approximate) or float64 (when approximate).
*
* Standard statistic names for gadbc_connection_get_statistics().
*
* They are corresponding to `ADBC_STATISTIC_*_KEY` values in `adbc.h`.
*
* Since: 1.0.0
*/
typedef enum {
GADBC_STATISTIC_KEY_AVERAGE_BYTE_WIDTH = 0,
GADBC_STATISTIC_KEY_DISTINCT_COUNT = 1,
GADBC_STATISTIC_KEY_MAX_BYTE_WIDTH = 2,
GADBC_STATISTIC_KEY_MAX_VALUE = 3,
GADBC_STATISTIC_KEY_MIN_VALUE = 4,
GADBC_STATISTIC_KEY_NULL_COUNT = 5,
GADBC_STATISTIC_KEY_ROW_COUNT = 6,
} GADBCStatisticKey;

GADBC_AVAILABLE_IN_1_0
const gchar* gadbc_statistic_key_to_string(GADBCStatisticKey key);

/**
* GADBCIsolationLevel:
* @GADBC_ISOLATION_LEVEL_DEFAULT: Use database or driver default
Expand Down Expand Up @@ -179,6 +247,11 @@ gpointer gadbc_connection_get_table_schema(GADBCConnection* connection,
const gchar* table_name, GError** error);
GADBC_AVAILABLE_IN_0_4
gpointer gadbc_connection_get_table_types(GADBCConnection* connection, GError** error);
GADBC_AVAILABLE_IN_1_0
gpointer gadbc_connection_get_statistics(GADBCConnection* connection,
const gchar* catalog, const gchar* db_schema,
const gchar* table_name, gboolean approximate,
GError** error);
GADBC_AVAILABLE_IN_0_4
gboolean gadbc_connection_commit(GADBCConnection* connection, GError** error);
GADBC_AVAILABLE_IN_0_4
Expand Down
20 changes: 10 additions & 10 deletions glib/adbc-glib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ definition_headers = files(
'statement.h',
)

version_h_conf = configuration_data()
version_h_conf.set('GADBC_VERSION', meson.project_version())
version_h_conf.set('GADBC_VERSION_MAJOR', version_major)
version_h_conf.set('GADBC_VERSION_MINOR', version_minor)
version_h_conf.set('GADBC_VERSION_MICRO', version_micro)
version_h = configure_file(input: 'version.h.in',
output: 'version.h',
configuration: version_h_conf)
definition_headers += version_h

headers = definition_headers
headers += files(
'adbc-glib-raw.h',
Expand All @@ -41,16 +51,6 @@ headers += files(
'statement-raw.h',
)

version_h_conf = configuration_data()
version_h_conf.set('GADBC_VERSION', meson.project_version())
version_h_conf.set('GADBC_VERSION_MAJOR', version_major)
version_h_conf.set('GADBC_VERSION_MINOR', version_minor)
version_h_conf.set('GADBC_VERSION_MICRO', version_micro)
version_h = configure_file(input: 'version.h.in',
output: 'version.h',
configuration: version_h_conf)
headers += version_h

enums = gnome.mkenums_simple('enum-types',
identifier_prefix: 'GADBC',
sources: definition_headers,
Expand Down
23 changes: 23 additions & 0 deletions glib/adbc-glib/version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,15 @@
# define GADBC_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
#endif

/**
* GADBC_VERSION_1_0:
*
* You can use this macro value for compile time API version check.
*
* Since: 1.0.0
*/
#define GADBC_VERSION_1_0 G_ENCODE_VERSION(1, 0)

/**
* GADBC_VERSION_0_10:
*
Expand Down Expand Up @@ -174,6 +183,20 @@

#define GADBC_AVAILABLE_IN_ALL

#if GADBC_VERSION_MIN_REQUIRED >= GADBC_VERSION_1_0
# define GADBC_DEPRECATED_IN_1_0 GADBC_DEPRECATED
# define GADBC_DEPRECATED_IN_1_0_FOR(function) GADBC_DEPRECATED_FOR(function)
#else
# define GADBC_DEPRECATED_IN_1_0
# define GADBC_DEPRECATED_IN_1_0_FOR(function)
#endif

#if GADBC_VERSION_MAX_ALLOWED < GADBC_VERSION_1_0
# define GADBC_AVAILABLE_IN_1_0 GADBC_UNAVAILABLE(1, 0)
#else
# define GADBC_AVAILABLE_IN_1_0
#endif

#if GADBC_VERSION_MIN_REQUIRED >= GADBC_VERSION_0_10
# define GADBC_DEPRECATED_IN_0_10 GADBC_DEPRECATED
# define GADBC_DEPRECATED_IN_0_10_FOR(function) GADBC_DEPRECATED_FOR(function)
Expand Down
24 changes: 17 additions & 7 deletions glib/test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,28 @@ def require_gi_bindings(major, minor, micro)
omit(message)
end

def import_array_stream(c_abi_array_stream)
begin
reader = Arrow::RecordBatchReader.import(c_abi_array_stream)
begin
yield(reader)
ensure
reader.unref
end
ensure
GLib.free(c_abi_array_stream)
end
end

def execute_statement(statement, need_result: true)
_, c_abi_array_stream, n_rows_affected = statement.execute(need_result)
begin
if need_result
reader = Arrow::RecordBatchReader.import(c_abi_array_stream)
if need_result
import_array_stream(c_abi_array_stream) do |reader|
table = reader.read_all
yield(table, n_rows_affected) if block_given?
else
yield(n_rows_affected) if block_given?
end
ensure
GLib.free(c_abi_array_stream) if need_result
else
yield(n_rows_affected) if block_given?
end
end

Expand Down
Loading
Loading