From 04e41f0f269b08d1f1dd8e689848e0f8065a9f6d Mon Sep 17 00:00:00 2001 From: Egor Zudin Date: Tue, 9 Apr 2024 13:37:16 +0300 Subject: [PATCH] Fq gather stable 24-1 (#3517) Co-authored-by: Ilnaz Nizametdinov Co-authored-by: kruall Co-authored-by: Alexander Petrukhin Co-authored-by: Vitalii Gridnev Co-authored-by: vporyadke Co-authored-by: Nikolay Shestakov Co-authored-by: niksaveliev Co-authored-by: Vitaly Isaev Co-authored-by: uzhastik Co-authored-by: Pisarenko Grigoriy <79596613+GrigoriyPA@users.noreply.github.com> Co-authored-by: Yaroslav Plishan <80714170+MetaGigachad@users.noreply.github.com> Co-authored-by: Hor911 Co-authored-by: Dmitry Kardymon Co-authored-by: Oleg Doronin Co-authored-by: Ivan Blinkov Co-authored-by: uzhastik Co-authored-by: Daniil Cherednik Co-authored-by: Andrey Kulaga --- .github/config/muted_ya.txt | 8 +- ydb/core/base/events.h | 1 + ydb/core/external_sources/object_storage.cpp | 11 +- .../libs/actors/clusters_from_connections.cpp | 15 +- ydb/core/fq/libs/actors/database_resolver.cpp | 104 +- ydb/core/fq/libs/actors/proxy_private.h | 2 - ydb/core/fq/libs/actors/run_actor.cpp | 7 +- .../libs/actors/ut/database_resolver_ut.cpp | 188 +++- ydb/core/fq/libs/actors/ya.make | 1 - .../checkpointing/checkpoint_coordinator.cpp | 10 + .../checkpointing/checkpoint_coordinator.h | 7 +- ydb/core/fq/libs/compute/common/config.h | 2 +- ydb/core/fq/libs/compute/common/ut/ya.make | 4 + ydb/core/fq/libs/compute/common/utils.cpp | 144 ++- ydb/core/fq/libs/compute/common/utils.h | 33 + ydb/core/fq/libs/compute/common/ya.make | 1 + .../fq/libs/compute/ydb/actors_factory.cpp | 50 +- ydb/core/fq/libs/compute/ydb/actors_factory.h | 4 +- ...compute_database_control_plane_service.cpp | 47 +- .../compute_database_control_plane_service.h | 1 + .../ydb/control_plane/database_monitoring.cpp | 15 +- .../monitoring_rest_client_actor.cpp | 154 +++ .../fq/libs/compute/ydb/control_plane/ya.make | 4 + ydb/core/fq/libs/compute/ydb/events/events.h | 18 +- .../fq/libs/compute/ydb/executer_actor.cpp | 9 +- ydb/core/fq/libs/compute/ydb/executer_actor.h | 1 + .../compute/ydb/resources_cleaner_actor.cpp | 13 +- .../libs/compute/ydb/result_writer_actor.cpp | 14 +- .../fq/libs/compute/ydb/result_writer_actor.h | 1 + .../libs/compute/ydb/status_tracker_actor.cpp | 180 ++-- .../libs/compute/ydb/status_tracker_actor.h | 2 + .../fq/libs/compute/ydb/stopper_actor.cpp | 94 +- ydb/core/fq/libs/compute/ydb/stopper_actor.h | 3 + .../libs/compute/ydb/ydb_connector_actor.cpp | 65 +- .../fq/libs/compute/ydb/ydb_run_actor.cpp | 53 +- ydb/core/fq/libs/config/protos/compute.proto | 1 + .../actors/query_utils.cpp | 8 + .../control_plane_proxy.cpp | 2 +- .../libs/control_plane_proxy/events/events.h | 2 + .../control_plane_proxy/{ => utils}/utils.h | 0 .../fq/libs/control_plane_proxy/utils/ya.make | 9 + ydb/core/fq/libs/control_plane_proxy/ya.make | 1 + .../internal/task_ping.cpp | 3 +- .../ydb_control_plane_storage_bindings.cpp | 4 +- .../ydb_control_plane_storage_connections.cpp | 4 +- .../ydb_control_plane_storage_queries.cpp | 14 +- ydb/core/fq/libs/events/events.h | 2 + ydb/core/fq/libs/init/init.cpp | 5 +- ydb/core/fq/libs/protos/fq_private.proto | 1 + .../grpc_services/query/rpc_execute_query.cpp | 46 +- ydb/core/grpc_services/rpc_fq.cpp | 3 +- ydb/core/grpc_services/rpc_fq_internal.cpp | 1 - ydb/core/grpc_services/service_fq.h | 2 +- ydb/core/grpc_services/ya.make | 3 +- .../kqp/common/events/script_executions.h | 89 +- .../kqp/compile_service/kqp_compile_actor.cpp | 28 +- .../kqp/executer_actor/kqp_data_executer.cpp | 4 +- ydb/core/kqp/executer_actor/ya.make | 1 + .../kqp_federated_query_helpers.cpp | 5 +- .../kqp_finalize_script_actor.cpp | 22 +- .../kqp_finalize_script_service.cpp | 7 +- .../external_data_source/manager.cpp | 9 +- ydb/core/kqp/host/kqp_host.cpp | 55 +- ydb/core/kqp/host/kqp_host_impl.h | 6 +- ydb/core/kqp/host/kqp_runner.cpp | 8 +- ydb/core/kqp/node_service/ya.make | 1 + ydb/core/kqp/opt/logical/kqp_opt_log.cpp | 25 +- .../proxy_service/kqp_script_executions.cpp | 256 ++--- .../kqp/proxy_service/kqp_script_executions.h | 4 +- .../kqp/query_compiler/kqp_query_compiler.cpp | 11 +- .../run_script_actor/kqp_run_script_actor.cpp | 4 - .../kqp/session_actor/kqp_session_actor.cpp | 15 +- .../generic/ch_recipe_ut_helpers.cpp | 51 - .../generic/ch_recipe_ut_helpers.h | 15 - .../generic/connector_recipe_ut_helpers.cpp | 33 - .../generic/connector_recipe_ut_helpers.h | 15 - .../generic/docker-compose.yml | 25 - .../generic/kqp_generic_plan_ut.cpp | 179 ---- .../generic/kqp_generic_provider_join_ut.cpp | 133 --- .../generic/mdb_mock_config.json | 13 - .../generic/pg_recipe_ut_helpers.cpp | 56 -- .../generic/pg_recipe_ut_helpers.h | 16 - .../generic_ut/kqp_generic_provider_ut.cpp | 52 +- ydb/core/kqp/ut/federated_query/ya.make | 1 - ydb/core/kqp/ut/opt/kqp_agg_ut.cpp | 101 ++ ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp | 48 + ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 23 + ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp | 31 +- ydb/core/testlib/test_client.cpp | 4 +- .../datamodel/external_data_source.md | 2 +- .../federated_query/_assets/architecture.png | Bin 0 -> 30414 bytes .../concepts/federated_query/architecture.md | 36 + .../ru/core/concepts/federated_query/index.md | 11 +- .../core/concepts/federated_query/toc_i.yaml | 1 + .../deploy/manual/_images/ydb_fq_onprem.png | Bin 0 -> 66987 bytes ydb/docs/ru/core/deploy/manual/connector.md | 185 ++++ .../manual/deploy-ydb-federated-query.md | 59 ++ ydb/docs/ru/core/deploy/manual/toc_i.yaml | 6 +- ydb/docs/ru/core/deploy/toc_i.yaml | 2 +- .../self_hosted/_images/ydb_fq_docker.png | Bin 0 -> 13418 bytes .../self_hosted/_includes/ydb_docker.md | 83 +- ydb/library/query_actor/query_actor.h | 101 +- ydb/library/yql/ast/yql_constraint.cpp | 3 +- .../yql/core/common_opt/yql_co_simple1.cpp | 4 +- .../compute/dq_compute_actor_checkpoints.cpp | 8 + .../compute/dq_compute_actor_checkpoints.h | 1 + .../yql/dq/actors/compute/retry_queue.cpp | 10 + .../yql/dq/actors/compute/retry_queue.h | 8 +- .../yql/dq/integration/yql_dq_integration.h | 2 +- ydb/library/yql/dq/opt/dq_opt_hopping.cpp | 793 +++++++++++++++ ydb/library/yql/dq/opt/dq_opt_hopping.h | 18 + ydb/library/yql/dq/opt/ya.make | 1 + .../minikql/comp_nodes/mkql_grace_join.cpp | 4 +- .../comp_nodes/mkql_grace_join_imp.cpp | 62 +- .../minikql/comp_nodes/mkql_grace_join_imp.h | 2 + .../comp_nodes/ut/mkql_grace_join_ut.cpp | 6 +- .../yql_clickhouse_dq_integration.cpp | 2 +- .../common/dq/yql_dq_integration_impl.cpp | 2 +- .../common/dq/yql_dq_integration_impl.h | 2 +- .../common/proto/gateways_config.proto | 28 +- ydb/library/yql/providers/dq/common/ya.make | 1 + .../yql/providers/dq/counters/counters.h | 4 + .../yql/providers/dq/opt/logical_optimize.cpp | 766 +------------- .../dq/planner/execution_planner.cpp | 2 +- .../dq/runtime/task_command_executor.cpp | 7 +- .../yql/providers/generic/actors/ya.make | 3 + .../generic/actors/yql_generic_read_actor.cpp | 43 +- .../actors/yql_generic_token_provider.cpp | 67 ++ .../actors/yql_generic_token_provider.h | 30 + .../connector/api/common/data_source.proto | 2 + .../api/service/protos/connector.proto | 6 +- .../generic/connector/libcpp/client.cpp | 18 +- .../generic/connector/libcpp/client.h | 2 - .../connector/libcpp/ut_helpers/defaults.cpp | 1 - .../connector/libcpp/ut_helpers/defaults.h | 1 - .../generic/connector/tests/README.md | 25 + .../{test_cases => common_test_cases}/base.py | 16 +- .../select_missing_database.py | 25 + .../common_test_cases/select_missing_table.py | 25 + .../select_positive_common.py | 34 +- .../{test_cases => common_test_cases}/ya.make | 8 - .../generic/connector/tests/conftest.py | 51 - .../tests/datasource/clickhouse/collection.py | 37 + .../tests/datasource/clickhouse/conftest.py | 22 + .../datasource/clickhouse/docker-compose.yml | 20 + .../clickhouse}/select_datetime.py | 127 +-- .../clickhouse/select_positive.py} | 4 +- .../tests/datasource/clickhouse/test.py | 104 ++ .../tests/datasource/clickhouse/ya.make | 66 ++ .../tests/datasource/postgresql/collection.py | 37 + .../tests/datasource/postgresql/conftest.py | 22 + .../datasource/postgresql/docker-compose.yml | 19 + .../datasource/postgresql/select_datetime.py | 163 +++ .../postgresql/select_positive.py} | 4 +- .../select_positive_with_schema.py} | 13 +- .../tests/datasource/postgresql/test.py | 133 +++ .../tests/datasource/postgresql/ya.make | 66 ++ .../connector/tests/datasource/ya.make | 5 + .../tests/datasource/ydb/collection.py | 32 + .../tests/datasource/ydb/conftest.py | 4 + .../tests/datasource/ydb/docker-compose.yml | 25 + .../tests/datasource/ydb/init/01_basic.sh | 47 + .../connector/tests/datasource/ydb/test.py | 50 + .../connector/tests/datasource/ydb}/ya.make | 49 +- .../fq-connector-go/fq-connector-go.yaml | 25 + .../connector/tests/join/collection.py | 26 + .../generic/connector/tests/join/conftest.py | 38 + .../tests/{ => join}/docker-compose.yml | 29 +- .../tests/{join.py => join/scenario.py} | 29 +- .../generic/connector/tests/join/test.py | 40 + .../{test_cases/join.py => join/test_case.py} | 12 +- .../generic/connector/tests/join/ya.make | 66 ++ .../providers/generic/connector/tests/test.py | 230 ----- .../connector/tests/test_cases/collection.py | 41 - .../test_cases/select_missing_database.py | 30 - .../tests/test_cases/select_missing_table.py | 30 - .../tests/utils/clients/clickhouse.py | 32 + .../tests/utils/clients/postgresql.py | 60 ++ .../connector/tests/utils/clients/ya.make | 16 + .../connector/tests/utils/clients/ydb.py | 10 + .../generic/connector/tests/utils/database.py | 4 + .../connector/tests/utils/docker_compose.py | 90 +- .../generic/connector/tests/utils/generate.py | 2 +- .../connector/tests/utils/postgresql.py | 176 ---- .../connector/tests/utils/{ => run}/dqrun.py | 29 +- .../connector/tests/utils/{ => run}/kqprun.py | 30 +- .../tests/utils/{runner.py => run/parent.py} | 14 +- .../connector/tests/utils/run/result.py | 15 + .../connector/tests/utils/run/runners.py | 23 + .../generic/connector/tests/utils/run/ya.make | 23 + .../tests/{ => utils/scenario}/clickhouse.py | 32 +- .../tests/{ => utils/scenario}/postgresql.py | 37 +- .../connector/tests/utils/scenario/ya.make | 17 + .../connector/tests/utils/scenario/ydb.py | 38 + .../generic/connector/tests/utils/schema.py | 8 +- .../generic/connector/tests/utils/settings.py | 110 +- .../tests/utils/{ => types}/clickhouse.py | 29 - .../connector/tests/utils/types/postgresql.py | 120 +++ .../connector/tests/utils/types/ya.make | 9 + .../connector/tests/utils/types/ydb.py | 79 ++ .../generic/connector/tests/utils/ya.make | 16 +- .../providers/generic/connector/tests/ya.make | 74 +- .../yql/providers/generic/proto/source.proto | 20 +- .../provider/ut/pushdown/pushdown_ut.cpp | 3 +- .../yql/providers/generic/provider/ya.make | 5 + .../provider/yql_generic_cluster_config.cpp | 119 ++- .../provider/yql_generic_dq_integration.cpp | 28 +- .../provider/yql_generic_io_discovery.cpp | 17 +- .../provider/yql_generic_load_meta.cpp | 117 ++- .../generic/provider/yql_generic_provider.cpp | 14 +- .../generic/provider/yql_generic_provider.h | 6 +- .../generic/provider/yql_generic_settings.cpp | 20 +- .../generic/provider/yql_generic_settings.h | 15 +- .../generic/provider/yql_generic_state.h | 18 +- .../generic/provider/yql_generic_utils.cpp | 22 + .../generic/provider/yql_generic_utils.h | 8 + .../pq/provider/yql_pq_dq_integration.cpp | 2 +- ydb/library/yql/providers/s3/actors/ya.make | 4 + .../s3/actors/yql_s3_applicator_actor.cpp | 31 +- .../providers/s3/actors/yql_s3_read_actor.cpp | 944 +++++++++++++----- .../providers/s3/actors/yql_s3_read_actor.h | 18 + .../yql/providers/s3/proto/file_queue.proto | 40 + .../yql/providers/s3/proto/source.proto | 1 + ydb/library/yql/providers/s3/proto/ya.make | 6 + .../yql/providers/s3/provider/ut/ya.make | 7 + ydb/library/yql/providers/s3/provider/ya.make | 1 + .../providers/s3/provider/yql_s3_datasink.cpp | 6 +- .../s3/provider/yql_s3_datasource.cpp | 8 +- .../s3/provider/yql_s3_dq_integration.cpp | 142 ++- .../s3/provider/yql_s3_io_discovery.cpp | 8 +- .../providers/s3/provider/yql_s3_provider.cpp | 6 +- .../providers/s3/provider/yql_s3_provider.h | 6 +- .../s3/provider/yql_s3_provider_impl.h | 2 +- .../providers/s3/provider/yql_s3_settings.cpp | 4 + .../providers/s3/provider/yql_s3_settings.h | 6 +- .../provider/yql_solomon_dq_integration.cpp | 2 +- .../ydb/provider/yql_ydb_dq_integration.cpp | 2 +- ydb/library/yql/sql/v1/query.cpp | 6 +- ydb/library/yql/sql/v1/sql_ut.cpp | 2 +- .../sql/dq_file/part3/canondata/result.json | 22 +- .../tests/sql/sql2yql/canondata/result.json | 18 +- .../yql/tests/sql/suites/join/grace_join2.sql | 2 +- .../nopushdown_filter_with_depends_on.sql | 1 + ydb/library/yql/tools/dqrun/dqrun.cpp | 39 +- ydb/library/yql/tools/dqrun/ya.make | 1 + .../Formats/Impl/CHColumnToArrowColumn.cpp | 61 ++ ydb/public/sdk/cpp/client/ydb_params/params.h | 2 + .../sdk/cpp/client/ydb_query/client.cpp | 15 +- ydb/public/sdk/cpp/client/ydb_query/client.h | 3 + .../cpp/client/ydb_query/impl/exec_query.cpp | 15 +- ydb/public/sdk/cpp/client/ydb_query/stats.cpp | 10 + ydb/public/sdk/cpp/client/ydb_query/stats.h | 1 + ydb/public/tools/lib/cmds/__init__.py | 31 + ydb/public/tools/lib/cmds/ut/test.py | 26 + ydb/public/tools/lib/cmds/ut/ya.make | 12 + ydb/public/tools/lib/cmds/ya.make | 5 +- ydb/public/tools/local_ydb/__main__.py | 2 +- ydb/public/tools/local_ydb/ya.make | 1 + ydb/services/fq/ut_integration/fq_ut.cpp | 160 --- ydb/tests/fq/s3/canondata/result.json | 36 + ..._time_format_common_simple_format_test.csv | 3 - ...time_format_common_simple_format_test.json | 2 - ...e_format_common_simple_format_test.parquet | Bin 1064 -> 0 bytes ..._time_format_common_simple_format_test.tsv | 3 - .../date_time_simple_iso_test.csv | 4 - .../date_time_simple_iso_test.json | 3 - .../date_time_simple_iso_test.parquet | Bin 1091 -> 0 bytes .../date_time_simple_iso_test.tsv | 4 - .../common_simple_posix_test.csv | 3 - .../common_simple_posix_test.json | 2 - .../common_simple_posix_test.tsv | 3 - ...stamp_format_common_simple_format_test.csv | 3 - ...tamp_format_common_simple_format_test.json | 2 - ...stamp_format_common_simple_format_test.tsv | 3 - ..._format_common_simple_format_test.parquet} | Bin 1085 -> 1060 bytes ...p_format_common_simple_format_test.parquet | Bin 0 -> 1060 bytes .../timestamp_simple_iso_test.csv | 5 - .../timestamp_simple_iso_test.json | 4 - .../timestamp_simple_iso_test.tsv | 5 - .../timestamp_simple_iso_test.parquet | Bin 0 -> 1078 bytes .../timestamp_simple_iso_test.parquet | Bin 0 -> 1078 bytes .../common_simple_posix_test.csv | 3 - .../common_simple_posix_test.json | 2 - .../common_simple_posix_test.tsv | 3 - .../common_simple_posix_test.parquet | Bin 0 -> 1046 bytes .../common_simple_posix_test.parquet | Bin 0 -> 1046 bytes ..._MICROSECONDS_timestamp_unix_time_test.csv | 4 - ..._MILLISECONDS_timestamp_unix_time_test.csv | 4 - ..._TIME_SECONDS_timestamp_unix_time_test.csv | 4 - ...MICROSECONDS_timestamp_unix_time_test.json | 3 - ...MILLISECONDS_timestamp_unix_time_test.json | 3 - ...TIME_SECONDS_timestamp_unix_time_test.json | 3 - ..._MICROSECONDS_timestamp_unix_time_test.tsv | 4 - ..._MILLISECONDS_timestamp_unix_time_test.tsv | 4 - ..._TIME_SECONDS_timestamp_unix_time_test.tsv | 4 - ...ROSECONDS_timestamp_unix_time_test.parquet | Bin 0 -> 1079 bytes ...LISECONDS_timestamp_unix_time_test.parquet | Bin 0 -> 1086 bytes ...E_SECONDS_timestamp_unix_time_test.parquet | Bin 0 -> 1079 bytes ...ROSECONDS_timestamp_unix_time_test.parquet | Bin 0 -> 1079 bytes ...LISECONDS_timestamp_unix_time_test.parquet | Bin 0 -> 1086 bytes ...E_SECONDS_timestamp_unix_time_test.parquet | Bin 0 -> 1079 bytes ydb/tests/fq/s3/test_bindings.py | 33 + ydb/tests/fq/s3/test_explicit_partitioning.py | 110 +- ydb/tests/fq/s3/test_format_setting.py | 15 +- ydb/tests/fq/s3/test_insert.py | 2 +- ydb/tests/fq/s3/test_s3.py | 217 ++-- ydb/tests/fq/s3/test_yq_v2.py | 6 +- ydb/tests/fq/yds/test_select_1.py | 8 +- ydb/tests/library/harness/kikimr_config.py | 39 +- ydb/tests/library/ut/kikimr_config.py | 23 + ydb/tests/library/ut/ya.make | 12 + ydb/tests/library/ya.make | 3 + ydb/tests/tools/kqprun/.gitignore | 4 + ydb/tests/tools/kqprun/kqprun.cpp | 79 +- ydb/tests/tools/kqprun/src/actors.cpp | 43 +- ydb/tests/tools/kqprun/src/actors.h | 4 +- ydb/tests/tools/kqprun/src/common.h | 14 +- ydb/tests/tools/kqprun/src/kqp_runner.cpp | 67 +- ydb/tests/tools/kqprun/src/kqp_runner.h | 4 +- ydb/tests/tools/kqprun/src/ydb_setup.cpp | 45 +- ydb/tests/tools/kqprun/src/ydb_setup.h | 8 +- 321 files changed, 7132 insertions(+), 3863 deletions(-) create mode 100644 ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp rename ydb/core/fq/libs/control_plane_proxy/{ => utils}/utils.h (100%) create mode 100644 ydb/core/fq/libs/control_plane_proxy/utils/ya.make delete mode 100644 ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp delete mode 100644 ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h delete mode 100644 ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp delete mode 100644 ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h delete mode 100644 ydb/core/kqp/ut/federated_query/generic/docker-compose.yml delete mode 100644 ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp delete mode 100644 ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp delete mode 100644 ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json delete mode 100644 ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp delete mode 100644 ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h create mode 100644 ydb/docs/ru/core/concepts/federated_query/_assets/architecture.png create mode 100644 ydb/docs/ru/core/concepts/federated_query/architecture.md create mode 100644 ydb/docs/ru/core/deploy/manual/_images/ydb_fq_onprem.png create mode 100644 ydb/docs/ru/core/deploy/manual/connector.md create mode 100644 ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md create mode 100644 ydb/docs/ru/core/getting_started/self_hosted/_images/ydb_fq_docker.png create mode 100644 ydb/library/yql/dq/opt/dq_opt_hopping.cpp create mode 100644 ydb/library/yql/dq/opt/dq_opt_hopping.h create mode 100644 ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp create mode 100644 ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h create mode 100644 ydb/library/yql/providers/generic/connector/tests/README.md rename ydb/library/yql/providers/generic/connector/tests/{test_cases => common_test_cases}/base.py (76%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py rename ydb/library/yql/providers/generic/connector/tests/{test_cases => common_test_cases}/select_positive_common.py (90%) rename ydb/library/yql/providers/generic/connector/tests/{test_cases => common_test_cases}/ya.make (66%) delete mode 100644 ydb/library/yql/providers/generic/connector/tests/conftest.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml rename ydb/library/yql/providers/generic/connector/tests/{test_cases => datasource/clickhouse}/select_datetime.py (70%) rename ydb/library/yql/providers/generic/connector/tests/{test_cases/select_positive_clickhouse.py => datasource/clickhouse/select_positive.py} (98%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py rename ydb/library/yql/providers/generic/connector/tests/{test_cases/select_positive_postgresql.py => datasource/postgresql/select_positive.py} (98%) rename ydb/library/yql/providers/generic/connector/tests/{test_cases/select_positive_postgresql_schema.py => datasource/postgresql/select_positive_with_schema.py} (79%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/ya.make create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml create mode 100755 ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh create mode 100644 ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py rename ydb/{core/kqp/ut/federated_query/generic => library/yql/providers/generic/connector/tests/datasource/ydb}/ya.make (54%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml create mode 100644 ydb/library/yql/providers/generic/connector/tests/join/collection.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/join/conftest.py rename ydb/library/yql/providers/generic/connector/tests/{ => join}/docker-compose.yml (58%) rename ydb/library/yql/providers/generic/connector/tests/{join.py => join/scenario.py} (60%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/join/test.py rename ydb/library/yql/providers/generic/connector/tests/{test_cases/join.py => join/test_case.py} (94%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/join/ya.make delete mode 100644 ydb/library/yql/providers/generic/connector/tests/test.py delete mode 100644 ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py delete mode 100644 ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py delete mode 100644 ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py delete mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py rename ydb/library/yql/providers/generic/connector/tests/utils/{ => run}/dqrun.py (91%) rename ydb/library/yql/providers/generic/connector/tests/utils/{ => run}/kqprun.py (93%) rename ydb/library/yql/providers/generic/connector/tests/utils/{runner.py => run/parent.py} (69%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/run/result.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make rename ydb/library/yql/providers/generic/connector/tests/{ => utils/scenario}/clickhouse.py (75%) rename ydb/library/yql/providers/generic/connector/tests/{ => utils/scenario}/postgresql.py (81%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py rename ydb/library/yql/providers/generic/connector/tests/utils/{ => types}/clickhouse.py (61%) create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make create mode 100644 ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py create mode 100644 ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp create mode 100644 ydb/library/yql/providers/generic/provider/yql_generic_utils.h create mode 100644 ydb/library/yql/providers/s3/proto/file_queue.proto create mode 100644 ydb/public/tools/lib/cmds/ut/test.py create mode 100644 ydb/public/tools/lib/cmds/ut/ya.make delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.parquet-parquet_/date_time_format_common_simple_format_test.parquet delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.parquet-parquet_/date_time_simple_iso_test.parquet delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv rename ydb/tests/fq/s3/canondata/{test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet => test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet} (51%) create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.csv-csv_with_names_/timestamp_simple_iso_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.csv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.tsv delete mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.tsv create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet create mode 100644 ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet create mode 100644 ydb/tests/library/ut/kikimr_config.py create mode 100644 ydb/tests/library/ut/ya.make diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index 46a896df34a4..30b0e9bda739 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -18,8 +18,11 @@ ydb/core/quoter/ut QuoterWithKesusTest.PrefetchCoefficient ydb/core/kafka_proxy/ut KafkaProtocol.CreatePartitionsScenario ydb/core/kafka_proxy/ut KafkaProtocol.ProduceScenario ydb/core/kqp/provider/ut KikimrIcGateway.TestLoadBasicSecretValueFromExternalDataSourceMetadata -ydb/core/kqp/ut/federated_query/generic * -ydb/core/kqp/ut/olap * +ydb/core/kqp/ut/olap KqpOlap.IndexesActualization +ydb/core/kqp/ut/olap KqpOlap.BlobsSharing* +ydb/core/kqp/ut/olap KqpOlap.ScanQueryOltpAndOlap +ydb/core/kqp/ut/olap KqpOlap.StatsUsageWithTTL +ydb/core/kqp/ut/olap KqpOlap.YqlScriptOltpAndOlap ydb/core/kqp/ut/pg KqpPg.CreateIndex ydb/core/kqp/ut/query KqpLimits.QueryReplySize ydb/core/kqp/ut/query KqpQuery.QueryTimeout @@ -29,7 +32,6 @@ ydb/core/kqp/ut/scheme KqpScheme.QueryWithAlter ydb/core/kqp/ut/scheme [44/50]* ydb/core/kqp/ut/service KqpQueryService.ExecuteQueryPgTableSelect ydb/core/kqp/ut/service KqpQueryService.QueryOnClosedSession -ydb/core/kqp/ut/service KqpQueryServiceScripts.ForgetScriptExecutionRace ydb/core/kqp/ut/service KqpService.CloseSessionsWithLoad ydb/core/kqp/ut/service [38/50]* ydb/core/tx/columnshard/ut_schema TColumnShardTestSchema.ForgetAfterFail diff --git a/ydb/core/base/events.h b/ydb/core/base/events.h index 97f89dc978a2..93c28aa34edf 100644 --- a/ydb/core/base/events.h +++ b/ydb/core/base/events.h @@ -173,6 +173,7 @@ struct TKikimrEvents : TEvents { ES_GRAPH, ES_REPLICATION_SERVICE, ES_CHANGE_EXCHANGE, + ES_S3_FILE_QUEUE, }; }; diff --git a/ydb/core/external_sources/object_storage.cpp b/ydb/core/external_sources/object_storage.cpp index e93f9603a2e0..b84712c95927 100644 --- a/ydb/core/external_sources/object_storage.cpp +++ b/ydb/core/external_sources/object_storage.cpp @@ -28,19 +28,20 @@ struct TObjectStorageExternalSource : public IExternalSource { const NKikimrExternalSources::TGeneral& general) const override { NKikimrExternalSources::TObjectStorage objectStorage; for (const auto& [key, value]: general.attributes()) { - if (key == "format") { + auto lowerKey = to_lower(key); + if (lowerKey == "format") { objectStorage.set_format(value); - } else if (key == "compression") { + } else if (lowerKey == "compression") { objectStorage.set_compression(value); } else if (key.StartsWith("projection.") || key == "storage.location.template") { objectStorage.mutable_projection()->insert({key, value}); - } else if (key == "partitioned_by") { + } else if (lowerKey == "partitioned_by") { auto json = NSc::TValue::FromJsonThrow(value); for (const auto& column: json.GetArray()) { *objectStorage.add_partitioned_by() = column; } - } else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "csv_delimiter"sv}, key)) { - objectStorage.mutable_format_setting()->insert({key, value}); + } else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "csv_delimiter"sv}, lowerKey)) { + objectStorage.mutable_format_setting()->insert({lowerKey, value}); } else { ythrow TExternalSourceException() << "Unknown attribute " << key; } diff --git a/ydb/core/fq/libs/actors/clusters_from_connections.cpp b/ydb/core/fq/libs/actors/clusters_from_connections.cpp index 8405473860f0..066668c574a9 100644 --- a/ydb/core/fq/libs/actors/clusters_from_connections.cpp +++ b/ydb/core/fq/libs/actors/clusters_from_connections.cpp @@ -216,17 +216,14 @@ void AddClustersFromConnections( switch (conn.content().setting().connection_case()) { case FederatedQuery::ConnectionSetting::kYdbDatabase: { const auto& db = conn.content().setting().ydb_database(); - auto* clusterCfg = gatewaysConfig.MutableYdb()->AddClusterMapping(); + auto* clusterCfg = gatewaysConfig.MutableGeneric()->AddClusterMapping(); + clusterCfg->SetKind(NYql::NConnector::NApi::EDataSourceKind::YDB); + clusterCfg->SetProtocol(NYql::NConnector::NApi::EProtocol::NATIVE); clusterCfg->SetName(connectionName); - clusterCfg->SetId(db.database_id()); - if (db.database()) - clusterCfg->SetDatabase(db.database()); - if (db.endpoint()) - clusterCfg->SetEndpoint(db.endpoint()); - clusterCfg->SetSecure(db.secure()); - clusterCfg->SetAddBearerToToken(common.GetUseBearerForYdb()); + clusterCfg->SetDatabaseId(db.database_id()); + clusterCfg->SetUseSsl(!common.GetDisableSslForGenericDataSources()); FillClusterAuth(*clusterCfg, db.auth(), authToken, accountIdSignatures); - clusters.emplace(connectionName, YdbProviderName); + clusters.emplace(connectionName, GenericProviderName); break; } case FederatedQuery::ConnectionSetting::kClickhouseCluster: { diff --git a/ydb/core/fq/libs/actors/database_resolver.cpp b/ydb/core/fq/libs/actors/database_resolver.cpp index b0858f65d46e..fe36ef9cf4d3 100644 --- a/ydb/core/fq/libs/actors/database_resolver.cpp +++ b/ydb/core/fq/libs/actors/database_resolver.cpp @@ -1,5 +1,6 @@ #include "database_resolver.h" +#include #include #include #include @@ -98,8 +99,6 @@ class TResponseProcessor : public TActorBootstrapped } void DieOnTtl() { - Success = false; - auto errorMsg = TStringBuilder() << "Could not resolve database ids: "; bool firstUnresolvedDbId = true; for (const auto& [_, params]: Requests) { @@ -112,46 +111,41 @@ class TResponseProcessor : public TActorBootstrapped } errorMsg << " in " << ResolvingTtl << " seconds."; LOG_E("ResponseProcessor::DieOnTtl: errorMsg=" << errorMsg); - - SendResolvedEndpointsAndDie(errorMsg); + Issues.AddIssue(errorMsg); + SendResolvedEndpointsAndDie(); } - void SendResolvedEndpointsAndDie(const TString& errorMsg) { - NYql::TIssues issues; - if (errorMsg) { - issues.AddIssue(errorMsg); - } - + void SendResolvedEndpointsAndDie() { Send(Sender, new TEvents::TEvEndpointResponse( - NYql::TDatabaseResolverResponse(std::move(DatabaseId2Description), Success, issues))); + NYql::TDatabaseResolverResponse(std::move(DatabaseId2Description), Issues.Empty(), Issues))); PassAway(); LOG_D("ResponseProcessor::SendResolvedEndpointsAndDie: passed away"); } void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev) { - TString errorMessage; TMaybe result; const auto requestIter = Requests.find(ev->Get()->Request); HandledIds++; - LOG_T("ResponseProcessor::Handle(HttpIncomingResponse): got MDB API response: code=" << ev->Get()->Response->Status); + LOG_T("ResponseProcessor::Handle(HttpIncomingResponse): got API response: code=" << ev->Get()->Response->Status); try { - HandleResponse(ev, requestIter, errorMessage, result); + HandleResponse(ev, requestIter, result); } catch (...) { const TString msg = TStringBuilder() << "error while response processing, params " << ((requestIter != Requests.end()) ? requestIter->second.ToDebugString() : TString{"unknown"}) << ", details: " << CurrentExceptionMessage(); LOG_E("ResponseProccessor::Handle(TEvHttpIncomingResponse): " << msg); + Issues.AddIssue(msg); } LOG_T("ResponseProcessor::Handle(HttpIncomingResponse): progress: " << DatabaseId2Description.size() << " of " << Requests.size() << " requests are done"); if (HandledIds == Requests.size()) { - SendResolvedEndpointsAndDie(errorMessage); + SendResolvedEndpointsAndDie(); } } @@ -160,18 +154,25 @@ class TResponseProcessor : public TActorBootstrapped void HandleResponse( NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev, const TRequestMap::const_iterator& requestIter, - TString& errorMessage, TMaybe& result) - { - if (ev->Get()->Error.empty() && (ev->Get()->Response && ev->Get()->Response->Status == "200")) { - errorMessage = HandleSuccessfulResponse(ev, requestIter, result); + { + TString errorMessage; + + if (requestIter == Requests.end()) { + // Requests are guaranteed to be kept in within TResponseProcessor until the response arrives. + // If there is no appropriate request, it's a fatal error. + errorMessage = "Invariant violation: unknown request"; } else { - errorMessage = HandleFailedResponse(ev, requestIter); + if (ev->Get()->Error.empty() && (ev->Get()->Response && ev->Get()->Response->Status == "200")) { + errorMessage = HandleSuccessfulResponse(ev, *requestIter, result); + } else { + errorMessage = HandleFailedResponse(ev, *requestIter); + } } if (errorMessage) { + Issues.AddIssue(errorMessage); LOG_E("ResponseProcessor::Handle(HttpIncomingResponse): error=" << errorMessage); - Success = false; } else { const auto& params = requestIter->second; auto key = std::make_tuple(params.Id, params.DatabaseType, params.DatabaseAuth); @@ -191,17 +192,13 @@ class TResponseProcessor : public TActorBootstrapped TString HandleSuccessfulResponse( NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev, - const TRequestMap::const_iterator& requestIter, + const TRequestMap::value_type& requestWithParams, TMaybe& result ) { - if (requestIter == Requests.end()) { - return "unknown request"; - } - NJson::TJsonReaderConfig jsonConfig; NJson::TJsonValue databaseInfo; - const auto& params = requestIter->second; + const auto& params = requestWithParams.second; const bool parseJsonOk = NJson::ReadJsonTree(ev->Get()->Response->Body, &jsonConfig, &databaseInfo); TParsers::const_iterator parserIt; if (parseJsonOk && (parserIt = Parsers.find(params.DatabaseType)) != Parsers.end()) { @@ -232,37 +229,37 @@ class TResponseProcessor : public TActorBootstrapped TString HandleFailedResponse( NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev, - const TRequestMap::const_iterator& requestIter + const TRequestMap::value_type& requestWithParams ) const { - if (requestIter == Requests.end()) { - return "unknown request"; - } + auto sb = TStringBuilder() + << "Error while trying to resolve managed " << ToString(requestWithParams.second.DatabaseType) + << " database with id " << requestWithParams.second.Id << " via HTTP request to" + << ": endpoint '" << requestWithParams.first->Host << "'" + << ", url '" << requestWithParams.first->URL << "'" + << ": "; + + // Handle network error (when the response is empty) + if (!ev->Get()->Response) { + return sb << ev->Get()->Error; + } + // Handle unauthenticated error const auto& status = ev->Get()->Response->Status; - if (status == "403") { - return TStringBuilder() << "You have no permission to resolve database id into database endpoint. " + DetailedPermissionsError(requestIter->second); + return sb << "you have no permission to resolve database id into database endpoint." + DetailedPermissionsError(requestWithParams.second); } - auto errorMessage = ev->Get()->Error; - - const TString error = TStringBuilder() - << "Cannot resolve database id (status = " << status << "). " - << "Response body from " << ev->Get()->Request->URL << ": " << (ev->Get()->Response ? ev->Get()->Response->Body : "empty"); - if (!errorMessage.empty()) { - errorMessage += '\n'; - } - errorMessage += error; - - return errorMessage; + // Unexpected error. Add response body for debug + return sb << Endl + << "Status: " << status << Endl + << "Response body: " << ev->Get()->Response->Body; } TString DetailedPermissionsError(const TResolveParams& params) const { - if (params.DatabaseType == EDatabaseType::ClickHouse || params.DatabaseType == EDatabaseType::PostgreSQL) { auto mdbTypeStr = NYql::DatabaseTypeLowercase(params.DatabaseType); - return TStringBuilder() << "Please check that your service account has role " << + return TStringBuilder() << " Please check that your service account has role " << "`managed-" << mdbTypeStr << ".viewer`."; } return {}; @@ -275,7 +272,7 @@ class TResponseProcessor : public TActorBootstrapped const NYql::IMdbEndpointGenerator::TPtr MdbEndpointGenerator; TDatabaseResolverResponse::TDatabaseDescriptionMap DatabaseId2Description; size_t HandledIds = 0; - bool Success = true; + NYql::TIssues Issues; const TParsers& Parsers; TDuration ResolvingTtl = TDuration::Seconds(30); //TODO: Use cfg }; @@ -312,7 +309,12 @@ class TDatabaseResolver: public TActor } Y_ENSURE(endpoint); - return TDatabaseDescription{endpoint, "", 0, database, secure}; + + TVector split = StringSplitter(endpoint).Split(':'); + + Y_ENSURE(split.size() == 2); + + return TDatabaseDescription{endpoint, split[0], FromString(split[1]), database, secure}; }; Parsers[NYql::EDatabaseType::Ydb] = ydbParser; Parsers[NYql::EDatabaseType::DataStreams] = [ydbParser]( @@ -327,9 +329,11 @@ class TDatabaseResolver: public TActor if (!isDedicatedDb && ret.Endpoint.StartsWith("ydb.")) { // Replace "ydb." -> "yds." ret.Endpoint[2] = 's'; + ret.Host[2] = 's'; } if (isDedicatedDb) { ret.Endpoint = "u-" + ret.Endpoint; + ret.Host = "u-" + ret.Host; } return ret; }; @@ -486,6 +490,7 @@ class TDatabaseResolver: public TActor try { TString url; if (IsIn({NYql::EDatabaseType::Ydb, NYql::EDatabaseType::DataStreams }, databaseType)) { + YQL_ENSURE(ev->Get()->YdbMvpEndpoint.Size() > 0, "empty YDB MVP Endpoint"); url = TUrlBuilder(ev->Get()->YdbMvpEndpoint + "/database") .AddUrlParam("databaseId", databaseId) .Build(); @@ -497,7 +502,6 @@ class TDatabaseResolver: public TActor .AddPathComponent("hosts") .Build(); } - LOG_D("ResponseProccessor::Handle(EndpointRequest): start GET request: " << url); NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet(url); @@ -507,6 +511,8 @@ class TDatabaseResolver: public TActor httpRequest->Set("Authorization", token); } + LOG_D("ResponseProccessor::Handle(EndpointRequest): start GET request: " << "url: " << httpRequest->URL); + requests[httpRequest] = TResolveParams{databaseId, databaseType, databaseAuth}; } catch (const std::exception& e) { const TString msg = TStringBuilder() << "error while preparing to resolve database id: " << databaseId diff --git a/ydb/core/fq/libs/actors/proxy_private.h b/ydb/core/fq/libs/actors/proxy_private.h index 4be79ef3277f..9a55b8a513ba 100644 --- a/ydb/core/fq/libs/actors/proxy_private.h +++ b/ydb/core/fq/libs/actors/proxy_private.h @@ -22,8 +22,6 @@ namespace NKikimr { namespace NFq { -NActors::TActorId MakeYqPrivateProxyId(); - NActors::IActor* CreateYqlAnalyticsPrivateProxy( const NConfig::TPrivateProxyConfig& privateProxyConfig, TIntrusivePtr timeProvider, diff --git a/ydb/core/fq/libs/actors/run_actor.cpp b/ydb/core/fq/libs/actors/run_actor.cpp index 4ead981b5004..80790ccb9f7e 100644 --- a/ydb/core/fq/libs/actors/run_actor.cpp +++ b/ydb/core/fq/libs/actors/run_actor.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1940,11 +1939,7 @@ class TRunActor : public NActors::TActorBootstrapped { } { - dataProvidersInit.push_back(GetYdbDataProviderInitializer(Params.YqSharedResources->UserSpaceYdbDriver, Params.CredentialsFactory, dbResolver)); - } - - { - dataProvidersInit.push_back(GetGenericDataProviderInitializer(Params.ConnectorClient, dbResolver)); + dataProvidersInit.push_back(GetGenericDataProviderInitializer(Params.ConnectorClient, dbResolver, Params.CredentialsFactory)); } { diff --git a/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp b/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp index 4055825c0c9b..50cff1b212a5 100644 --- a/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp +++ b/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp @@ -14,7 +14,22 @@ namespace { using namespace NKikimr; using namespace NFq; -TString NoPermissionStr = "You have no permission to resolve database id into database endpoint. "; +TString MakeErrorPrefix( + const TString& host, + const TString& url, + const TString& databaseId, + const NYql::EDatabaseType& databaseType) { + TStringBuilder ss; + + return TStringBuilder() + << "Error while trying to resolve managed " << ToString(databaseType) + << " database with id " << databaseId << " via HTTP request to" + << ": endpoint '" << host << "'" + << ", url '" << url << "'" + << ": "; +} + +TString NoPermissionStr = "you have no permission to resolve database id into database endpoint."; struct TTestBootstrap : public TTestActorRuntime { NConfig::TCheckpointCoordinatorConfig Settings; @@ -113,7 +128,9 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { const TString& status, const TString& responseBody, const NYql::TDatabaseResolverResponse::TDatabaseDescription& description, - const NYql::TIssues& issues) + const NYql::TIssues& issues, + const TString& error = "" + ) { TTestBootstrap bootstrap; @@ -122,16 +139,16 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { databaseAuth.Protocol = protocol; TString databaseId{"etn021us5r9rhld1vgbh"}; - auto requestIdAnddatabaseType = std::make_pair(databaseId, databaseType); + auto requestIdAndDatabaseType = std::make_pair(databaseId, databaseType); bootstrap.Send(new IEventHandle( bootstrap.DatabaseResolver, bootstrap.AsyncResolver, new NFq::TEvents::TEvEndpointRequest( NYql::IDatabaseAsyncResolver::TDatabaseAuthMap( - {std::make_pair(requestIdAnddatabaseType, databaseAuth)}), + {std::make_pair(requestIdAndDatabaseType, databaseAuth)}), TString("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod"), - TString("mdbGateway"), + TString("https://mdb.api.cloud.yandex.net:443"), TString("traceId"), NFq::MakeMdbEndpointGeneratorGeneric(true)))); @@ -144,18 +161,21 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { bootstrap.WaitForBootstrap(); - auto response = std::make_unique(nullptr); - response->Status = status; - response->Body = responseBody; + std::unique_ptr httpIncomingResponse; + if (!error) { + httpIncomingResponse = std::make_unique(nullptr); + httpIncomingResponse->Status = status; + httpIncomingResponse->Body = responseBody; + } bootstrap.Send(new IEventHandle( processorActorId, bootstrap.HttpProxy, - new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest->Request, response.release(), ""))); + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest->Request, httpIncomingResponse.release(), error))); NYql::TDatabaseResolverResponse::TDatabaseDescriptionMap result; if (status == "200") { - result[requestIdAnddatabaseType] = description; + result[requestIdAndDatabaseType] = description; } bootstrap.ExpectEvent(bootstrap.AsyncResolver, NFq::TEvents::TEvEndpointResponse( @@ -174,8 +194,8 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"ydb.serverless.yandexcloud.net:2135"}, - TString{""}, - 0, + TString{"ydb.serverless.yandexcloud.net"}, + 2135, TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), true }, @@ -183,6 +203,36 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { ); } + Y_UNIT_TEST(Ydb_Serverless_Timeout) { + NYql::TIssues issues{ + NYql::TIssue( + TStringBuilder{} << MakeErrorPrefix( + "ydbc.ydb.cloud.yandex.net:8789", + "/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::Ydb + ) << "Connection timeout" + ) + }; + + Test( + NYql::EDatabaseType::Ydb, + NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", + "", + "", + NYql::TDatabaseResolverResponse::TDatabaseDescription{ + TString{"ydb.serverless.yandexcloud.net:2135"}, + TString{"ydb.serverless.yandexcloud.net"}, + 2135, + TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), + true + }, + issues, + "Connection timeout" + ); + } + Y_UNIT_TEST(DataStreams_Serverless) { Test( NYql::EDatabaseType::DataStreams, @@ -195,12 +245,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"yds.serverless.yandexcloud.net:2135"}, - TString{""}, - 0, + TString{"yds.serverless.yandexcloud.net"}, + 2135, TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), true - }, - {} + }, + {} ); } @@ -217,8 +267,8 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"u-lb.etn021us5r9rhld1vgbh.ydb.mdb.yandexcloud.net:2135"}, - TString{""}, - 0, + TString{"u-lb.etn021us5r9rhld1vgbh.ydb.mdb.yandexcloud.net"}, + 2135, TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), true }, @@ -297,7 +347,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(ClickHouse_PermissionDenied) { NYql::TIssues issues{ NYql::TIssue( - TStringBuilder{} << NoPermissionStr << "Please check that your service account has role `managed-clickhouse.viewer`." + TStringBuilder{} << MakeErrorPrefix( + "mdb.api.cloud.yandex.net:443", + "/managed-clickhouse/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::ClickHouse + ) << NoPermissionStr << " Please check that your service account has role `managed-clickhouse.viewer`." ) }; @@ -365,7 +420,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(PostgreSQL_PermissionDenied) { NYql::TIssues issues{ NYql::TIssue( - TStringBuilder{} << NoPermissionStr << "Please check that your service account has role `managed-postgresql.viewer`." + TStringBuilder{} << MakeErrorPrefix( + "mdb.api.cloud.yandex.net:443", + "/managed-postgresql/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::PostgreSQL + ) << NoPermissionStr << " Please check that your service account has role `managed-postgresql.viewer`." ) }; @@ -395,7 +455,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(DataStreams_PermissionDenied) { NYql::TIssues issues{ NYql::TIssue( - NoPermissionStr + TStringBuilder{} << MakeErrorPrefix( + "ydbc.ydb.cloud.yandex.net:8789", + "/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::DataStreams + ) << NoPermissionStr ) }; Test( @@ -412,6 +477,87 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { issues ); } + + Y_UNIT_TEST(ResolveTwoDataStreamsFirstError) { + TTestBootstrap bootstrap; + + NYql::TDatabaseAuth databaseAuth; + databaseAuth.UseTls = true; + databaseAuth.Protocol = NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED; + + TString databaseId1{"etn021us5r9rhld1vgb1"}; + TString databaseId2{"etn021us5r9rhld1vgb2"}; + auto requestIdAndDatabaseType1 = std::make_pair(databaseId1, NYql::EDatabaseType::DataStreams); + auto requestIdAndDatabaseType2 = std::make_pair(databaseId2, NYql::EDatabaseType::DataStreams); + + bootstrap.Send(new IEventHandle( + bootstrap.DatabaseResolver, + bootstrap.AsyncResolver, + new NFq::TEvents::TEvEndpointRequest( + NYql::IDatabaseAsyncResolver::TDatabaseAuthMap({ + std::make_pair(requestIdAndDatabaseType1, databaseAuth), + std::make_pair(requestIdAndDatabaseType2, databaseAuth)}), + TString("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod"), + TString("https://mdb.api.cloud.yandex.net:443"), + TString("traceId"), + NFq::MakeMdbEndpointGeneratorGeneric(true)))); + + auto httpRequest1 = NHttp::THttpOutgoingRequest::CreateRequestGet("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb1"); + auto httpRequest2 = NHttp::THttpOutgoingRequest::CreateRequestGet("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb2"); + + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr httpOutgoingRequestHolder1 = bootstrap.GrabEdgeEvent(bootstrap.HttpProxy, TDuration::Seconds(10)); + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr httpOutgoingRequestHolder2 = bootstrap.GrabEdgeEvent(bootstrap.HttpProxy, TDuration::Seconds(10)); + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest* httpOutgoingRequest1 = httpOutgoingRequestHolder1.Get()->Get(); + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest* httpOutgoingRequest2 = httpOutgoingRequestHolder2.Get()->Get(); + if (httpOutgoingRequest1->Request->URL != httpRequest1->URL) { + std::swap(httpOutgoingRequest1, httpOutgoingRequest2); + } + + NActors::TActorId processorActorId = httpOutgoingRequestHolder1->Sender; + bootstrap.WaitForBootstrap(); + + auto response1 = std::make_unique(nullptr); + response1->Status = "404"; + response1->Body = R"({"message":"Database not found"})"; + + bootstrap.Send(new IEventHandle( + processorActorId, + bootstrap.HttpProxy, + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest1->Request, response1.release(), ""))); + + auto response2 = std::make_unique(nullptr); + response2->Status = "200"; + response2->Body = R"({"endpoint":"grpcs://ydb.serverless.yandexcloud.net:2135/?database=/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"})"; + + bootstrap.Send(new IEventHandle( + processorActorId, + bootstrap.HttpProxy, + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest2->Request, response2.release(), ""))); + + NYql::TDatabaseResolverResponse::TDatabaseDescriptionMap result; + result[requestIdAndDatabaseType2] = NYql::TDatabaseResolverResponse::TDatabaseDescription{ + TString{"yds.serverless.yandexcloud.net:2135"}, + TString{"yds.serverless.yandexcloud.net"}, + 2135, + TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), + true + }; + + NYql::TIssues issues{ + NYql::TIssue( + TStringBuilder() << MakeErrorPrefix( + "ydbc.ydb.cloud.yandex.net:8789", + "/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb1", + "etn021us5r9rhld1vgb1", + NYql::EDatabaseType::DataStreams)<< "\nStatus: 404\nResponse body: {\"message\":\"Database not found\"}" + ) + }; + + bootstrap.ExpectEvent(bootstrap.AsyncResolver, + NFq::TEvents::TEvEndpointResponse( + NYql::TDatabaseResolverResponse(std::move(result), false, issues))); + } + } } // namespace NFq diff --git a/ydb/core/fq/libs/actors/ya.make b/ydb/core/fq/libs/actors/ya.make index bcf906c56a51..42d277fbc98c 100644 --- a/ydb/core/fq/libs/actors/ya.make +++ b/ydb/core/fq/libs/actors/ya.make @@ -80,7 +80,6 @@ PEERDIR( ydb/library/yql/providers/pq/provider ydb/library/yql/providers/pq/task_meta ydb/library/yql/providers/s3/provider - ydb/library/yql/providers/ydb/provider ydb/library/yql/public/issue ydb/library/yql/public/issue/protos ydb/library/yql/sql/settings diff --git a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp index 5186054fdbbc..0bc2d773828f 100644 --- a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp +++ b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp @@ -571,6 +571,16 @@ void TCheckpointCoordinator::Handle(NActors::TEvInterconnect::TEvNodeConnected:: } } +void TCheckpointCoordinator::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + CC_LOG_D("Handle undelivered"); + + if (const auto actorIt = AllActors.find(ev->Sender); actorIt != AllActors.end()) { + actorIt->second->EventsQueue.HandleUndelivered(ev); + } + + NYql::TTaskControllerImpl::OnUndelivered(ev); +} + void TCheckpointCoordinator::Handle(NActors::TEvents::TEvPoison::TPtr& ev) { CC_LOG_D("Got TEvPoison"); Send(ev->Sender, new NActors::TEvents::TEvPoisonTaken(), 0, ev->Cookie); diff --git a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h index a778a707a954..0aa477f295fa 100644 --- a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h +++ b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h @@ -60,6 +60,7 @@ class TCheckpointCoordinator : public NYql::TTaskControllerImpl::OnUndelivered) hFunc(NActors::TEvents::TEvWakeup, NYql::TTaskControllerImpl::OnWakeup) hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle) - hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle), + hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle) + hFunc(NActors::TEvents::TEvUndelivered, Handle) - ExceptionFunc(std::exception, HandleException) + , ExceptionFunc(std::exception, HandleException) ) static constexpr char ActorName[] = "YQ_CHECKPOINT_COORDINATOR"; diff --git a/ydb/core/fq/libs/compute/common/config.h b/ydb/core/fq/libs/compute/common/config.h index a869f893ba96..b038f8d815a7 100644 --- a/ydb/core/fq/libs/compute/common/config.h +++ b/ydb/core/fq/libs/compute/common/config.h @@ -164,8 +164,8 @@ class TComputeConfig { case FederatedQuery::ConnectionSetting::kObjectStorage: case FederatedQuery::ConnectionSetting::kClickhouseCluster: case FederatedQuery::ConnectionSetting::kPostgresqlCluster: - return true; case FederatedQuery::ConnectionSetting::kYdbDatabase: + return true; case FederatedQuery::ConnectionSetting::kDataStreams: case FederatedQuery::ConnectionSetting::kMonitoring: case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: diff --git a/ydb/core/fq/libs/compute/common/ut/ya.make b/ydb/core/fq/libs/compute/common/ut/ya.make index 536d94ff54ae..ae170608a8eb 100644 --- a/ydb/core/fq/libs/compute/common/ut/ya.make +++ b/ydb/core/fq/libs/compute/common/ut/ya.make @@ -11,6 +11,10 @@ SRCS( utils_ut.cpp ) +PEERDIR( + ydb/library/yql/public/udf/service/stub +) + YQL_LAST_ABI_VERSION() END() diff --git a/ydb/core/fq/libs/compute/common/utils.cpp b/ydb/core/fq/libs/compute/common/utils.cpp index 127bd26f1ecc..b790d8d8a8b4 100644 --- a/ydb/core/fq/libs/compute/common/utils.cpp +++ b/ydb/core/fq/libs/compute/common/utils.cpp @@ -1,8 +1,11 @@ #include "utils.h" #include +#include #include +#include + namespace NFq { using TAggregates = std::map>; @@ -621,7 +624,7 @@ void EnumeratePlansV2(NYson::TYsonWriter& writer, NJson::TJsonValue& value, ui32 } } -TString GetV1StatFromV2PlanV2(const TString& plan) { +TString GetV1StatFromV2PlanV2(const TString& plan, double* cpuUsage) { TStringStream out; NYson::TYsonWriter writer(&out); writer.OnBeginMap(); @@ -655,6 +658,9 @@ TString GetV1StatFromV2PlanV2(const TString& plan) { if (totals.CpuTimeUs.Sum) { writer.OnKeyedItem("cpu"); writer.OnStringScalar(FormatDurationUs(totals.CpuTimeUs.Sum)); + if (cpuUsage) { + *cpuUsage = totals.CpuTimeUs.Sum / 1000000.0; + } } if (totals.SourceCpuTimeUs.Sum) { writer.OnKeyedItem("scpu"); @@ -750,4 +756,140 @@ TPublicStat GetPublicStat(const TString& statistics) { return counters; } +struct TNoneStatProcessor : IPlanStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_NONE; + } + + TString ConvertPlan(TString& plan) override { + return plan; + } + + TString GetQueryStat(TString&, double& cpuUsage) override { + cpuUsage = 0.0; + return ""; + } + + TPublicStat GetPublicStat(TString&) override { + return TPublicStat{}; + } +}; + +struct TBasicStatProcessor : TNoneStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_BASIC; + } +}; + +struct TFullStatProcessor : IPlanStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_FULL; + } + + TString ConvertPlan(TString& plan) override { + return plan; + } + + TString GetQueryStat(TString& plan, double& cpuUsage) override { + return GetV1StatFromV2Plan(plan, &cpuUsage); + } + + TPublicStat GetPublicStat(TString& stat) override { + return NFq::GetPublicStat(stat); + } +}; + +struct TProfileStatProcessor : TFullStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_PROFILE; + } +}; + +struct TProdStatProcessor : TFullStatProcessor { + TString GetQueryStat(TString& plan, double& cpuUsage) override { + return GetPrettyStatistics(GetV1StatFromV2Plan(plan, &cpuUsage)); + } +}; + +std::unique_ptr CreateStatProcessor(const TString& statViewName) { + // disallow none and basic stat since they do not support metering + // if (statViewName == "stat_none") return std::make_unique(); + // if (statViewName == "stat_basc") return std::make_unique(); + if (statViewName == "stat_full") return std::make_unique(); + if (statViewName == "stat_prof") return std::make_unique(); + if (statViewName == "stat_prod") return std::make_unique(); + return std::make_unique(); +} + +PingTaskRequestBuilder::PingTaskRequestBuilder(const NConfig::TCommonConfig& commonConfig, std::unique_ptr&& processor) + : Compressor(commonConfig.GetQueryArtifactsCompressionMethod(), commonConfig.GetQueryArtifactsCompressionMinSize()) + , Processor(std::move(processor)) +{} + +Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build( + const Ydb::TableStats::QueryStats& queryStats, + const NYql::TIssues& issues, + std::optional computeStatus, + std::optional pendingStatusCode +) { + Fq::Private::PingTaskRequest pingTaskRequest = Build(queryStats); + + if (issues) { + NYql::IssuesToMessage(issues, pingTaskRequest.mutable_issues()); + } + + if (computeStatus) { + pingTaskRequest.set_status(*computeStatus); + } + + if (pendingStatusCode) { + pingTaskRequest.set_pending_status_code(*pendingStatusCode); + } + + return pingTaskRequest; +} + + +Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(const Ydb::TableStats::QueryStats& queryStats) { + return Build(queryStats.query_plan(), queryStats.query_ast()); +} + +Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(const TString& queryPlan, const TString& queryAst) { + Fq::Private::PingTaskRequest pingTaskRequest; + + Issues.Clear(); + + auto plan = queryPlan; + try { + plan = Processor->ConvertPlan(plan); + } catch(const NJson::TJsonException& ex) { + Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Error plan conversion: " << ex.what())); + } + + if (Compressor.IsEnabled()) { + auto [astCompressionMethod, astCompressed] = Compressor.Compress(queryAst); + pingTaskRequest.mutable_ast_compressed()->set_method(astCompressionMethod); + pingTaskRequest.mutable_ast_compressed()->set_data(astCompressed); + + auto [planCompressionMethod, planCompressed] = Compressor.Compress(plan); + pingTaskRequest.mutable_plan_compressed()->set_method(planCompressionMethod); + pingTaskRequest.mutable_plan_compressed()->set_data(planCompressed); + } else { + pingTaskRequest.set_ast(queryAst); + pingTaskRequest.set_plan(plan); + } + + CpuUsage = 0.0; + try { + auto stat = Processor->GetQueryStat(plan, CpuUsage); + pingTaskRequest.set_statistics(stat); + pingTaskRequest.set_dump_raw_statistics(true); + PublicStat = Processor->GetPublicStat(stat); + } catch(const NJson::TJsonException& ex) { + Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Error stat conversion: " << ex.what())); + } + + return pingTaskRequest; +} + } // namespace NFq diff --git a/ydb/core/fq/libs/compute/common/utils.h b/ydb/core/fq/libs/compute/common/utils.h index 4a61a45bf61a..47387490162d 100644 --- a/ydb/core/fq/libs/compute/common/utils.h +++ b/ydb/core/fq/libs/compute/common/utils.h @@ -1,8 +1,12 @@ #pragma once +#include + +#include #include #include #include + #include namespace NFq { @@ -43,4 +47,33 @@ struct TPublicStat { TPublicStat GetPublicStat(const TString& statistics); +struct IPlanStatProcessor { + virtual ~IPlanStatProcessor() = default; + virtual Ydb::Query::StatsMode GetStatsMode() = 0; + virtual TString ConvertPlan(TString& plan) = 0; + virtual TString GetQueryStat(TString& plan, double& cpuUsage) = 0; + virtual TPublicStat GetPublicStat(TString& stat) = 0; +}; + +std::unique_ptr CreateStatProcessor(const TString& statViewName); + +class PingTaskRequestBuilder { +public: + PingTaskRequestBuilder(const NConfig::TCommonConfig& commonConfig, std::unique_ptr&& processor); + Fq::Private::PingTaskRequest Build( + const Ydb::TableStats::QueryStats& queryStats, + const NYql::TIssues& issues, + std::optional computeStatus = std::nullopt, + std::optional pendingStatusCode = std::nullopt + ); + Fq::Private::PingTaskRequest Build(const Ydb::TableStats::QueryStats& queryStats); + Fq::Private::PingTaskRequest Build(const TString& queryPlan, const TString& queryAst); + NYql::TIssues Issues; + double CpuUsage = 0.0; + TPublicStat PublicStat; +private: + const TCompressor Compressor; + std::unique_ptr Processor; +}; + } // namespace NFq diff --git a/ydb/core/fq/libs/compute/common/ya.make b/ydb/core/fq/libs/compute/common/ya.make index d4a0c6ab2dae..f2fc3e00a5a5 100644 --- a/ydb/core/fq/libs/compute/common/ya.make +++ b/ydb/core/fq/libs/compute/common/ya.make @@ -9,6 +9,7 @@ SRCS( PEERDIR( library/cpp/json/yson ydb/core/fq/libs/config/protos + ydb/core/fq/libs/control_plane_storage/internal ydb/core/fq/libs/db_id_async_resolver_impl ydb/core/fq/libs/grpc ydb/core/fq/libs/shared_resources diff --git a/ydb/core/fq/libs/compute/ydb/actors_factory.cpp b/ydb/core/fq/libs/compute/ydb/actors_factory.cpp index 9db333a97da9..aa7d38d00fcb 100644 --- a/ydb/core/fq/libs/compute/ydb/actors_factory.cpp +++ b/ydb/core/fq/libs/compute/ydb/actors_factory.cpp @@ -9,6 +9,7 @@ #include "ydb_connector_actor.h" #include +#include namespace NFq { @@ -16,6 +17,7 @@ struct TActorFactory : public IActorFactory { TActorFactory(const NFq::TRunActorParams& params, const ::NYql::NCommon::TServiceCounters& counters) : Params(params) , Counters(counters) + , StatViewName(GetStatViewName()) {} std::unique_ptr CreatePinger(const NActors::TActorId& parent) const override { @@ -46,21 +48,22 @@ struct TActorFactory : public IActorFactory { std::unique_ptr CreateExecuter(const NActors::TActorId &parent, const NActors::TActorId &connector, const NActors::TActorId &pinger) const override { - return CreateExecuterActor(Params, parent, connector, pinger, Counters); + return CreateExecuterActor(Params, CreateStatProcessor()->GetStatsMode(), parent, connector, pinger, Counters); } std::unique_ptr CreateStatusTracker(const NActors::TActorId &parent, const NActors::TActorId &connector, const NActors::TActorId &pinger, const NYdb::TOperation::TOperationId& operationId) const override { - return CreateStatusTrackerActor(Params, parent, connector, pinger, operationId, Counters); + return CreateStatusTrackerActor(Params, parent, connector, pinger, operationId, CreateStatProcessor(), Counters); } std::unique_ptr CreateResultWriter(const NActors::TActorId& parent, const NActors::TActorId& connector, const NActors::TActorId& pinger, - const NKikimr::NOperationId::TOperationId& operationId) const override { - return CreateResultWriterActor(Params, parent, connector, pinger, operationId, Counters); + const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected) const override { + return CreateResultWriterActor(Params, parent, connector, pinger, operationId, operationEntryExpected, Counters); } std::unique_ptr CreateResourcesCleaner(const NActors::TActorId& parent, @@ -79,13 +82,50 @@ struct TActorFactory : public IActorFactory { std::unique_ptr CreateStopper(const NActors::TActorId& parent, const NActors::TActorId& connector, + const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId) const override { - return CreateStopperActor(Params, parent, connector, operationId, Counters); + return CreateStopperActor(Params, parent, connector, pinger, operationId, CreateStatProcessor(), Counters); + } + + std::unique_ptr CreateStatProcessor() const { + return NFq::CreateStatProcessor(StatViewName); + } + + TString GetStatViewName() { + auto p = Params.Sql.find("--fq_dev_hint_"); + if (p != Params.Sql.npos) { + p += 14; + auto p1 = Params.Sql.find("\n", p); + TString mode = Params.Sql.substr(p, p1 == Params.Sql.npos ? Params.Sql.npos : p1 - p); + if (mode) { + return mode; + } + } + + if (!Params.Config.GetControlPlaneStorage().GetDumpRawStatistics()) { + return "stat_prod"; + } + + switch (Params.Config.GetControlPlaneStorage().GetStatsMode()) { + case Ydb::Query::StatsMode::STATS_MODE_UNSPECIFIED: + return "stat_full"; + case Ydb::Query::StatsMode::STATS_MODE_NONE: + return "stat_none"; + case Ydb::Query::StatsMode::STATS_MODE_BASIC: + return "stat_basc"; + case Ydb::Query::StatsMode::STATS_MODE_FULL: + return "stat_full"; + case Ydb::Query::StatsMode::STATS_MODE_PROFILE: + return "stat_prof"; + default: + return "stat_full"; + } } private: NFq::TRunActorParams Params; ::NYql::NCommon::TServiceCounters Counters; + TString StatViewName; }; IActorFactory::TPtr CreateActorFactory(const NFq::TRunActorParams& params, const ::NYql::NCommon::TServiceCounters& counters) { diff --git a/ydb/core/fq/libs/compute/ydb/actors_factory.h b/ydb/core/fq/libs/compute/ydb/actors_factory.h index ae85da060f7a..4abaed2178ab 100644 --- a/ydb/core/fq/libs/compute/ydb/actors_factory.h +++ b/ydb/core/fq/libs/compute/ydb/actors_factory.h @@ -28,7 +28,8 @@ struct IActorFactory : public TThrRefBase { virtual std::unique_ptr CreateResultWriter(const NActors::TActorId& parent, const NActors::TActorId& connector, const NActors::TActorId& pinger, - const NKikimr::NOperationId::TOperationId& operationId) const = 0; + const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected) const = 0; virtual std::unique_ptr CreateResourcesCleaner(const NActors::TActorId& parent, const NActors::TActorId& connector, const NYdb::TOperation::TOperationId& operationId) const = 0; @@ -39,6 +40,7 @@ struct IActorFactory : public TThrRefBase { FederatedQuery::QueryMeta::ComputeStatus status) const = 0; virtual std::unique_ptr CreateStopper(const NActors::TActorId& parent, const NActors::TActorId& connector, + const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId) const = 0; }; diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp index 08382442f6db..2eadd15e1ea6 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp +++ b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp @@ -343,7 +343,14 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap void CreateSingleClientActors(const NConfig::TYdbComputeControlPlane::TSingle& singleConfig) { auto globalLoadConfig = Config.GetYdb().GetLoadControlConfig(); if (globalLoadConfig.GetEnable()) { - auto clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(singleConfig.GetConnection()), CredentialsProviderFactory(GetYdbCredentialSettings(singleConfig.GetConnection()))->CreateProvider()).release()); + TActorId clientActor; + auto monitoringEndpoint = globalLoadConfig.GetMonitoringEndpoint(); + auto credentialsProvider = CredentialsProviderFactory(GetYdbCredentialSettings(singleConfig.GetConnection()))->CreateProvider(); + if (monitoringEndpoint) { + clientActor = Register(CreateMonitoringRestClientActor(monitoringEndpoint, singleConfig.GetConnection().GetDatabase(), credentialsProvider).release()); + } else { + clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(singleConfig.GetConnection()), credentialsProvider).release()); + } MonitoringActorId = Register(CreateDatabaseMonitoringActor(clientActor, globalLoadConfig, Counters).release()); } } @@ -352,15 +359,23 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap const auto& mapping = cmsConfig.GetDatabaseMapping(); auto globalLoadConfig = Config.GetYdb().GetLoadControlConfig(); for (const auto& config: mapping.GetCommon()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateCmsGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); TActorId databaseMonitoringActor; const NConfig::TLoadControlConfig& loadConfig = config.GetLoadControlConfig().GetEnable() - ? Config.GetYdb().GetLoadControlConfig() + ? config.GetLoadControlConfig() : globalLoadConfig; if (loadConfig.GetEnable()) { - auto clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, Counters).release()); + TActorId clientActor; + auto monitoringEndpoint = loadConfig.GetMonitoringEndpoint(); + auto credentialsProvider = CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider(); + if (monitoringEndpoint) { + clientActor = Register(CreateMonitoringRestClientActor(monitoringEndpoint, config.GetControlPlaneConnection().GetDatabase(), credentialsProvider).release()); + } else { + clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), credentialsProvider).release()); + } + databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, databaseCounters).release()); } Clients->CommonDatabaseClients.push_back({clientActor, config, cacheActor, databaseMonitoringActor}); } @@ -368,15 +383,23 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap Y_ABORT_UNLESS(Clients->CommonDatabaseClients); for (const auto& [scope, config]: mapping.GetScopeToComputeDatabase()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateCmsGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); TActorId databaseMonitoringActor; const NConfig::TLoadControlConfig& loadConfig = config.GetLoadControlConfig().GetEnable() - ? Config.GetYdb().GetLoadControlConfig() + ? config.GetLoadControlConfig() : globalLoadConfig; if (loadConfig.GetEnable()) { - auto clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, Counters).release()); + TActorId clientActor; + auto monitoringEndpoint = loadConfig.GetMonitoringEndpoint(); + auto credentialsProvider = CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider(); + if (monitoringEndpoint) { + clientActor = Register(CreateMonitoringRestClientActor(monitoringEndpoint, config.GetControlPlaneConnection().GetDatabase(), credentialsProvider).release()); + } else { + clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), credentialsProvider).release()); + } + databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, databaseCounters).release()); } Clients->ScopeToDatabaseClient[scope] = {clientActor, config, cacheActor, databaseMonitoringActor}; } @@ -385,16 +408,18 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap void CreateControlPlaneClientActors(const NConfig::TYdbComputeControlPlane::TYdbcp& controlPlaneConfig, const TString& databasesCacheReloadPeriod) { const auto& mapping = controlPlaneConfig.GetDatabaseMapping(); for (const auto& config: mapping.GetCommon()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateYdbcpGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); Clients->CommonDatabaseClients.push_back({clientActor, config, cacheActor, {}}); } Y_ABORT_UNLESS(Clients->CommonDatabaseClients); for (const auto& [scope, config]: mapping.GetScopeToComputeDatabase()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateYdbcpGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); Clients->ScopeToDatabaseClient[scope] = {clientActor, config, cacheActor, {}}; } } diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h index 509d72ada94b..07ef06c3d20e 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h +++ b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h @@ -29,6 +29,7 @@ std::unique_ptr CreateCmsGrpcClientActor(const NCloud::TGrpcCli std::unique_ptr CreateComputeDatabasesCacheActor(const NActors::TActorId& databaseClientActorId, const TString& databasesCacheReloadPeriod, const ::NMonitoring::TDynamicCounterPtr& counters); std::unique_ptr CreateMonitoringGrpcClientActor(const NCloud::TGrpcClientSettings& settings, const NYdb::TCredentialsProviderPtr& credentialsProvider); +std::unique_ptr CreateMonitoringRestClientActor(const TString& endpoint, const TString& database, const NYdb::TCredentialsProviderPtr& credentialsProvider); std::unique_ptr CreateDatabaseMonitoringActor(const NActors::TActorId& monitoringClientActorId, NFq::NConfig::TLoadControlConfig config, const ::NMonitoring::TDynamicCounterPtr& counters); diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp index 19963aa9fc28..57d7e5da3b92 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp +++ b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp @@ -34,6 +34,8 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrappedGetCounter("InstantLoadPercentage", false); AverageLoadPercentage = subComponent->GetCounter("AverageLoadPercentage", false); QuotedLoadPercentage = subComponent->GetCounter("QuotedLoadPercentage", false); + AvailableLoadPercentage = subComponent->GetCounter("AvailableLoadPercentage", false); + TargetLoadPercentage = subComponent->GetCounter("TargetLoadPercentage", false); PendingQueueSize = subComponent->GetCounter("PendingQueueSize", false); PendingQueueOverload = subComponent->GetCounter("PendingQueueOverload", true); } @@ -78,7 +82,10 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped(MaxClusterLoad * 100); + } static constexpr char ActorName[] = "FQ_COMPUTE_DATABASE_MONITORING_ACTOR"; @@ -113,6 +120,10 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped= AverageLoadInterval) { @@ -252,7 +263,7 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped PendingQueue; }; diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp new file mode 100644 index 000000000000..c7085478fda9 --- /dev/null +++ b/ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp @@ -0,0 +1,154 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#define LOG_E(stream) LOG_ERROR_S(*TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_W(stream) LOG_WARN_S( *TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_I(stream) LOG_INFO_S( *TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_T(stream) LOG_TRACE_S(*TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) + +namespace NFq { + +using namespace NActors; + +class TMonitoringRestServiceActor : public NActors::TActor { +public: + using TBase = NActors::TActor; + + TMonitoringRestServiceActor(const TString& endpoint, const TString& database, const NYdb::TCredentialsProviderPtr& credentialsProvider) + : TBase(&TMonitoringRestServiceActor::StateFunc) + , Endpoint(endpoint) + , Database(database) + , CredentialsProvider(credentialsProvider) + {} + + STRICT_STFUNC(StateFunc, + hFunc(TEvYdbCompute::TEvCpuLoadRequest, Handle); + hFunc(NYql::NDq::TEvHttpBase::TEvSendResult, Handle); + ) + + void Handle(TEvYdbCompute::TEvCpuLoadRequest::TPtr& ev) { + if (Y_UNLIKELY(!HttpProxyId)) { + HttpProxyId = Register(NHttp::CreateHttpProxy(NMonitoring::TMetricRegistry::SharedInstance())); + } + + auto httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet( + NYql::TUrlBuilder(Endpoint) + .AddPathComponent("viewer") + .AddPathComponent("json") + .AddPathComponent("tenantinfo") + .AddUrlParam("path", Database) + .Build() + ); + auto ticket = CredentialsProvider->GetAuthInfo(); + LOG_D(httpRequest->GetRawData() << " using ticket " << NKikimr::MaskTicket(ticket)); + httpRequest->Set("Authorization", ticket); + + auto httpSenderId = Register(NYql::NDq::CreateHttpSenderActor(SelfId(), HttpProxyId, NYql::NDq::THttpSenderRetryPolicy::GetNoRetryPolicy())); + Send(httpSenderId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest), 0, Cookie); + Requests[Cookie++] = ev; + } + + void Handle(NYql::NDq::TEvHttpBase::TEvSendResult::TPtr& ev) { + auto it = Requests.find(ev->Cookie); + if (it == Requests.end()) { + LOG_E("Request doesn't exist (TEvSendResult). Need to fix this bug urgently"); + return; + } + auto request = it->second; + Requests.erase(it); + + const auto& result = *ev->Get(); + const auto& response = *result.HttpIncomingResponse->Get(); + + auto forwardResponse = std::make_unique(); + + const TString& error = response.GetError(); + if (!error.empty()) { + forwardResponse->Issues.AddIssue(error); + Send(request->Sender, forwardResponse.release(), 0, request->Cookie); + return; + } + + try { + NJson::TJsonReaderConfig jsonConfig; + NJson::TJsonValue info; + if (NJson::ReadJsonTree(response.Response->Body, &jsonConfig, &info)) { + bool usageFound = false; + if (auto* tenantNode = info.GetValueByPath("TenantInfo")) { + if (tenantNode->GetType() == NJson::JSON_ARRAY) { + for (auto tenantItem : tenantNode->GetArray()) { + if (auto* nameNode = tenantItem.GetValueByPath("Name")) { + if (nameNode->GetStringSafe() != Database) { + continue; + } + } + if (auto* poolNode = tenantItem.GetValueByPath("PoolStats")) { + if (poolNode->GetType() == NJson::JSON_ARRAY) { + for (auto poolItem : poolNode->GetArray()) { + if (auto* nameNode = poolItem.GetValueByPath("Name")) { + if (nameNode->GetStringSafe() == "User") { + if (auto* usageNode = poolItem.GetValueByPath("Usage")) { + forwardResponse->InstantLoad = usageNode->GetDoubleSafe(); + usageFound = true; + break; + } + if (auto* threadsNode = poolItem.GetValueByPath("Threads")) { + forwardResponse->CpuNumber = threadsNode->GetIntegerSafe(); + } + } + } + } + } + } + if (usageFound) { + break; + } + } + } + } + if (!usageFound) { + forwardResponse->Issues.AddIssue(TStringBuilder() << "MISSED User pool node load for database \"" << Database << '"'); + } + } else { + forwardResponse->Issues.AddIssue("Malformed JSON"); + } + } catch(const std::exception& e) { + forwardResponse->Issues.AddIssue(TStringBuilder() << "Error on JSON parsing: '" << e.what() << "'"); + } + + if (forwardResponse->Issues) { + LOG_E(response.Response->Body); + } + Send(request->Sender, forwardResponse.release(), 0, request->Cookie); + } + +private: + TString Endpoint; + TString Database; + TMap Requests; + NYdb::TCredentialsProviderPtr CredentialsProvider; + int64_t Cookie = 0; + TActorId HttpProxyId; +}; + +std::unique_ptr CreateMonitoringRestClientActor(const TString& endpoint, const TString& database, const NYdb::TCredentialsProviderPtr& credentialsProvider) { + return std::make_unique(endpoint, database, credentialsProvider); +} + +} diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make index daedf40cdffe..523a26c2bfe1 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make +++ b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make @@ -6,10 +6,12 @@ SRCS( compute_databases_cache.cpp database_monitoring.cpp monitoring_grpc_client_actor.cpp + monitoring_rest_client_actor.cpp ydbcp_grpc_client_actor.cpp ) PEERDIR( + library/cpp/json ydb/library/actors/core ydb/library/actors/protos ydb/core/fq/libs/compute/ydb/synchronization_service @@ -18,6 +20,8 @@ PEERDIR( ydb/core/protos ydb/library/db_pool/protos ydb/library/yql/public/issue + ydb/library/yql/utils + ydb/library/yql/utils/actors ydb/public/api/grpc ydb/public/api/grpc/draft ydb/public/lib/operation_id/protos diff --git a/ydb/core/fq/libs/compute/ydb/events/events.h b/ydb/core/fq/libs/compute/ydb/events/events.h index 6f5961b75462..3f19becf8203 100644 --- a/ydb/core/fq/libs/compute/ydb/events/events.h +++ b/ydb/core/fq/libs/compute/ydb/events/events.h @@ -71,13 +71,14 @@ struct TEvYdbCompute { // Events struct TEvExecuteScriptRequest : public NActors::TEventLocal { - TEvExecuteScriptRequest(TString sql, TString idempotencyKey, const TDuration& resultTtl, const TDuration& operationTimeout, Ydb::Query::Syntax syntax, Ydb::Query::ExecMode execMode, const TString& traceId) + TEvExecuteScriptRequest(TString sql, TString idempotencyKey, const TDuration& resultTtl, const TDuration& operationTimeout, Ydb::Query::Syntax syntax, Ydb::Query::ExecMode execMode, Ydb::Query::StatsMode statsMode, const TString& traceId) : Sql(std::move(sql)) , IdempotencyKey(std::move(idempotencyKey)) , ResultTtl(resultTtl) , OperationTimeout(operationTimeout) , Syntax(syntax) , ExecMode(execMode) + , StatsMode(statsMode) , TraceId(traceId) {} @@ -87,6 +88,7 @@ struct TEvYdbCompute { TDuration OperationTimeout; Ydb::Query::Syntax Syntax = Ydb::Query::SYNTAX_YQL_V1; Ydb::Query::ExecMode ExecMode = Ydb::Query::EXEC_MODE_EXECUTE; + Ydb::Query::StatsMode StatsMode = Ydb::Query::StatsMode::STATS_MODE_FULL; TString TraceId; }; @@ -117,18 +119,20 @@ struct TEvYdbCompute { }; struct TEvGetOperationResponse : public NActors::TEventLocal { - TEvGetOperationResponse(NYql::TIssues issues, NYdb::EStatus status) + TEvGetOperationResponse(NYql::TIssues issues, NYdb::EStatus status, bool ready) : Issues(std::move(issues)) , Status(status) + , Ready(ready) {} - TEvGetOperationResponse(NYdb::NQuery::EExecStatus execStatus, Ydb::StatusIds::StatusCode statusCode, const TVector& resultSetsMeta, const Ydb::TableStats::QueryStats& queryStats, NYql::TIssues issues) + TEvGetOperationResponse(NYdb::NQuery::EExecStatus execStatus, Ydb::StatusIds::StatusCode statusCode, const TVector& resultSetsMeta, const Ydb::TableStats::QueryStats& queryStats, NYql::TIssues issues, bool ready = true) : ExecStatus(execStatus) , StatusCode(statusCode) , ResultSetsMeta(resultSetsMeta) , QueryStats(queryStats) , Issues(std::move(issues)) , Status(NYdb::EStatus::SUCCESS) + , Ready(ready) {} NYdb::NQuery::EExecStatus ExecStatus = NYdb::NQuery::EExecStatus::Unspecified; @@ -137,6 +141,7 @@ struct TEvYdbCompute { Ydb::TableStats::QueryStats QueryStats; NYql::TIssues Issues; NYdb::EStatus Status; + bool Ready; }; struct TEvFetchScriptResultRequest : public NActors::TEventLocal { @@ -454,16 +459,17 @@ struct TEvYdbCompute { }; struct TEvCpuLoadResponse : public NActors::TEventLocal { - TEvCpuLoadResponse(double instantLoad = 0.0, double averageLoad = 0.0) - : InstantLoad(instantLoad), AverageLoad(averageLoad) + TEvCpuLoadResponse(double instantLoad = 0.0, double averageLoad = 0.0, ui32 cpuNumber = 0) + : InstantLoad(instantLoad), AverageLoad(averageLoad), CpuNumber(cpuNumber) {} TEvCpuLoadResponse(NYql::TIssues issues) - : InstantLoad(0.0), AverageLoad(0.0), Issues(std::move(issues)) + : InstantLoad(0.0), AverageLoad(0.0), CpuNumber(0), Issues(std::move(issues)) {} double InstantLoad; double AverageLoad; + ui32 CpuNumber; NYql::TIssues Issues; }; diff --git a/ydb/core/fq/libs/compute/ydb/executer_actor.cpp b/ydb/core/fq/libs/compute/ydb/executer_actor.cpp index 73a90ba51c6f..177fe00ded33 100644 --- a/ydb/core/fq/libs/compute/ydb/executer_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/executer_actor.cpp @@ -59,9 +59,10 @@ class TExecuterActor : public TBaseComputeActor { } }; - TExecuterActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const ::NYql::NCommon::TServiceCounters& queryCounters) + TExecuterActor(const TRunActorParams& params, Ydb::Query::StatsMode statsMode, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const ::NYql::NCommon::TServiceCounters& queryCounters) : TBaseComputeActor(queryCounters, "Executer") , Params(params) + , StatsMode(statsMode) , Parent(parent) , Connector(connector) , Pinger(pinger) @@ -114,7 +115,7 @@ class TExecuterActor : public TBaseComputeActor { } void SendExecuteScript() { - Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_EXECUTE_SCRIPT), SelfId(), Connector, Params.Sql, Params.JobId, Params.ResultTtl, Params.ExecutionTtl, GetSyntax(), GetExecuteMode(), Params.JobId + "_" + ToString(Params.RestartCount))); + Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_EXECUTE_SCRIPT), SelfId(), Connector, Params.Sql, Params.JobId, Params.ResultTtl, Params.ExecutionTtl, GetSyntax(), GetExecuteMode(), StatsMode, Params.JobId + "_" + ToString(Params.RestartCount))); } Ydb::Query::Syntax GetSyntax() const { @@ -162,6 +163,7 @@ class TExecuterActor : public TBaseComputeActor { private: TRunActorParams Params; + Ydb::Query::StatsMode StatsMode; TActorId Parent; TActorId Connector; TActorId Pinger; @@ -172,11 +174,12 @@ class TExecuterActor : public TBaseComputeActor { }; std::unique_ptr CreateExecuterActor(const TRunActorParams& params, + Ydb::Query::StatsMode statsMode, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, pinger, queryCounters); + return std::make_unique(params, statsMode, parent, connector, pinger, queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/executer_actor.h b/ydb/core/fq/libs/compute/ydb/executer_actor.h index 763501092489..c1a6c1d6478a 100644 --- a/ydb/core/fq/libs/compute/ydb/executer_actor.h +++ b/ydb/core/fq/libs/compute/ydb/executer_actor.h @@ -9,6 +9,7 @@ namespace NFq { std::unique_ptr CreateExecuterActor(const TRunActorParams& params, + Ydb::Query::StatsMode statsMode, const NActors::TActorId& parent, const NActors::TActorId& connector, const NActors::TActorId& pinger, diff --git a/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp b/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp index d2eeb18035b9..1053ef8e005f 100644 --- a/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -65,14 +66,19 @@ class TResourcesCleanerActor : public TBaseComputeActor , Connector(connector) , OperationId(operationId) , Counters(GetStepCountersSubgroup()) + , BackoffTimer(20, 1000) {} static constexpr char ActorName[] = "FQ_RESOURCES_CLEANER_ACTOR"; + void SendForgetOperation(const TDuration& delay = TDuration::Zero()) { + Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_FORGET_OPERATION), delay, SelfId(), Connector, OperationId)); + } + void Start() { LOG_I("Start resources cleaner actor. Compute state: " << FederatedQuery::QueryMeta::ComputeStatus_Name(Params.Status)); Become(&TResourcesCleanerActor::StateFunc); - Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_FORGET_OPERATION), SelfId(), Connector, OperationId)); + SendForgetOperation(); } STRICT_STFUNC(StateFunc, @@ -81,6 +87,10 @@ class TResourcesCleanerActor : public TBaseComputeActor void Handle(const TEvYdbCompute::TEvForgetOperationResponse::TPtr& ev) { const auto& response = *ev.Get()->Get(); + if (response.Status == NYdb::EStatus::TIMEOUT || response.Status == NYdb::EStatus::CLIENT_DEADLINE_EXCEEDED) { + SendForgetOperation(TDuration::MilliSeconds(BackoffTimer.NextBackoffMs())); + return; + } if (response.Status != NYdb::EStatus::SUCCESS && response.Status != NYdb::EStatus::NOT_FOUND) { LOG_E("Can't forget operation: " << ev->Get()->Issues.ToOneLineString()); Send(Parent, new TEvYdbCompute::TEvResourcesCleanerResponse(ev->Get()->Issues, ev->Get()->Status)); @@ -98,6 +108,7 @@ class TResourcesCleanerActor : public TBaseComputeActor TActorId Connector; NYdb::TOperation::TOperationId OperationId; TCounters Counters; + NKikimr::TBackoffTimer BackoffTimer; }; std::unique_ptr CreateResourcesCleanerActor(const TRunActorParams& params, diff --git a/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp b/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp index ef6da5653b4a..b6f0ff8efc05 100644 --- a/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp @@ -202,13 +202,14 @@ class TResultWriterActor : public TBaseComputeActor { } }; - TResultWriterActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, const ::NYql::NCommon::TServiceCounters& queryCounters) + TResultWriterActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, bool operationEntryExpected, const ::NYql::NCommon::TServiceCounters& queryCounters) : TBaseComputeActor(queryCounters, "ResultWriter") , Params(params) , Parent(parent) , Connector(connector) , Pinger(pinger) , OperationId(operationId) + , OperationEntryExpected(operationEntryExpected) , Counters(GetStepCountersSubgroup()) {} @@ -246,6 +247,13 @@ class TResultWriterActor : public TBaseComputeActor { void Handle(const TEvYdbCompute::TEvGetOperationResponse::TPtr& ev) { const auto& response = *ev.Get()->Get(); + if (!OperationEntryExpected && response.Status == NYdb::EStatus::NOT_FOUND) { + LOG_I("Operation has been already removed"); + Send(Parent, new TEvYdbCompute::TEvResultWriterResponse({}, NYdb::EStatus::SUCCESS)); + CompleteAndPassAway(); + return; + } + if (response.Status != NYdb::EStatus::SUCCESS) { LOG_E("Can't get operation: " << ev->Get()->Issues.ToOneLineString()); Send(Parent, new TEvYdbCompute::TEvResultWriterResponse(ev->Get()->Issues, ev->Get()->Status)); @@ -314,6 +322,7 @@ class TResultWriterActor : public TBaseComputeActor { TActorId Connector; TActorId Pinger; NKikimr::NOperationId::TOperationId OperationId; + const bool OperationEntryExpected; TCounters Counters; TInstant StartTime; TString FetchToken; @@ -325,8 +334,9 @@ std::unique_ptr CreateResultWriterActor(const TRunActorParams& const TActorId& connector, const TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, pinger, operationId, queryCounters); + return std::make_unique(params, parent, connector, pinger, operationId, operationEntryExpected, queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/result_writer_actor.h b/ydb/core/fq/libs/compute/ydb/result_writer_actor.h index ee24d14772b1..ca6c1454d42b 100644 --- a/ydb/core/fq/libs/compute/ydb/result_writer_actor.h +++ b/ydb/core/fq/libs/compute/ydb/result_writer_actor.h @@ -13,6 +13,7 @@ std::unique_ptr CreateResultWriterActor(const TRunActorParams& const NActors::TActorId& connector, const NActors::TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected, const ::NYql::NCommon::TServiceCounters& queryCounters); } diff --git a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp index d6ef6600f05a..c27323eb5748 100644 --- a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp @@ -1,11 +1,10 @@ #include "base_compute_actor.h" +#include "status_tracker_actor.h" -#include #include #include #include #include -#include #include #include #include @@ -14,7 +13,6 @@ #include #include -#include #include #include @@ -41,6 +39,8 @@ class TStatusTrackerActor : public TBaseComputeActor { public: using IRetryPolicy = IRetryPolicy; + using TBase = TBaseComputeActor; + enum ERequestType { RT_GET_OPERATION, RT_PING, @@ -69,16 +69,16 @@ class TStatusTrackerActor : public TBaseComputeActor { } }; - TStatusTrackerActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, const ::NYql::NCommon::TServiceCounters& queryCounters) - : TBaseComputeActor(queryCounters, "StatusTracker") + TStatusTrackerActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) + : TBase(queryCounters, "StatusTracker") , Params(params) , Parent(parent) , Connector(connector) , Pinger(pinger) , OperationId(operationId) + , Builder(params.Config.GetCommon(), std::move(processor)) , Counters(GetStepCountersSubgroup()) , BackoffTimer(20, 1000) - , Compressor(params.Config.GetCommon().GetQueryArtifactsCompressionMethod(), params.Config.GetCommon().GetQueryArtifactsCompressionMinSize()) {} static constexpr char ActorName[] = "FQ_STATUS_TRACKER"; @@ -97,19 +97,23 @@ class TStatusTrackerActor : public TBaseComputeActor { void Handle(const TEvents::TEvForwardPingResponse::TPtr& ev) { auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); pingCounters->InFly->Dec(); + pingCounters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); + + if (ev.Get()->Get()->Success) { + pingCounters->Ok->Inc(); + } else { + pingCounters->Error->Inc(); + } if (ev->Cookie) { return; } - pingCounters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); if (ev.Get()->Get()->Success) { - pingCounters->Ok->Inc(); LOG_I("Information about the status of operation is stored"); Send(Parent, new TEvYdbCompute::TEvStatusTrackerResponse(Issues, Status, ExecStatus, ComputeStatus)); CompleteAndPassAway(); } else { - pingCounters->Error->Inc(); LOG_E("Error saving information about the status of operation"); Send(Parent, new TEvYdbCompute::TEvStatusTrackerResponse(NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Error saving information about the status of operation: " << ProtoToString(OperationId)}}, NYdb::EStatus::INTERNAL_ERROR, ExecStatus, ComputeStatus)); FailedAndPassAway(); @@ -133,8 +137,6 @@ class TStatusTrackerActor : public TBaseComputeActor { return; } - ReportPublicCounters(response.QueryStats); - StartTime = TInstant::Now(); LOG_D("Execution status: " << static_cast(response.ExecStatus)); switch (response.ExecStatus) { case NYdb::NQuery::EExecStatus::Unspecified: @@ -163,47 +165,42 @@ class TStatusTrackerActor : public TBaseComputeActor { } } - void ReportPublicCounters(const Ydb::TableStats::QueryStats& stats) { - try { - auto stat = GetPublicStat(GetV1StatFromV2Plan(stats.query_plan())); - auto publicCounters = GetPublicCounters(); + void ReportPublicCounters(const TPublicStat& stat) { + auto publicCounters = GetPublicCounters(); - if (stat.MemoryUsageBytes) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.memory_usage_bytes"); - counter = *stat.MemoryUsageBytes; - } + if (stat.MemoryUsageBytes) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.memory_usage_bytes"); + counter = *stat.MemoryUsageBytes; + } - if (stat.CpuUsageUs) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.cpu_usage_us", true); - counter = *stat.CpuUsageUs; - } + if (stat.CpuUsageUs) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.cpu_usage_us", true); + counter = *stat.CpuUsageUs; + } - if (stat.InputBytes) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.input_bytes", true); - counter = *stat.InputBytes; - } + if (stat.InputBytes) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.input_bytes", true); + counter = *stat.InputBytes; + } - if (stat.OutputBytes) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.output_bytes", true); - counter = *stat.OutputBytes; - } + if (stat.OutputBytes) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.output_bytes", true); + counter = *stat.OutputBytes; + } - if (stat.SourceInputRecords) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.source_input_records", true); - counter = *stat.SourceInputRecords; - } + if (stat.SourceInputRecords) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.source_input_records", true); + counter = *stat.SourceInputRecords; + } - if (stat.SinkOutputRecords) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.sink_output_records", true); - counter = *stat.SinkOutputRecords; - } + if (stat.SinkOutputRecords) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.sink_output_records", true); + counter = *stat.SinkOutputRecords; + } - if (stat.RunningTasks) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.running_tasks"); - counter = *stat.RunningTasks; - } - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + if (stat.RunningTasks) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.running_tasks"); + counter = *stat.RunningTasks; } } @@ -211,75 +208,57 @@ class TStatusTrackerActor : public TBaseComputeActor { Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_GET_OPERATION), delay, SelfId(), Connector, OperationId)); } - void UpdateProgress() { + void OnPingRequestStart() { + StartTime = TInstant::Now(); auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); pingCounters->InFly->Inc(); - Fq::Private::PingTaskRequest pingTaskRequest; - PrepareAstAndPlan(pingTaskRequest, QueryStats.query_plan(), QueryStats.query_ast()); - try { - pingTaskRequest.set_statistics(GetV1StatFromV2Plan(QueryStats.query_plan())); - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + } + + void UpdateProgress() { + OnPingRequestStart(); + + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); } + ReportPublicCounters(Builder.PublicStat); Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest), 0, 1); } + void UpdateCpuQuota(double cpuUsage) { + TDuration duration = TDuration::MicroSeconds(QueryStats.total_duration_us()); + if (cpuUsage && duration) { + Send(NFq::ComputeDatabaseControlPlaneServiceActorId(), new TEvYdbCompute::TEvCpuQuotaAdjust(Params.Scope.ToString(), duration, cpuUsage)); + } + } + void Failed() { LOG_I("Execution status: Failed, Status: " << Status << ", StatusCode: " << NYql::NDqProto::StatusIds::StatusCode_Name(StatusCode) << " Issues: " << Issues.ToOneLineString()); - auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); - pingCounters->InFly->Inc(); - Fq::Private::PingTaskRequest pingTaskRequest; - NYql::IssuesToMessage(Issues, pingTaskRequest.mutable_issues()); - pingTaskRequest.set_pending_status_code(StatusCode); - PrepareAstAndPlan(pingTaskRequest, QueryStats.query_plan(), QueryStats.query_ast()); - try { - TDuration duration = TDuration::MicroSeconds(QueryStats.total_duration_us()); - double cpuUsage = 0.0; - pingTaskRequest.set_statistics(GetV1StatFromV2Plan(QueryStats.query_plan(), &cpuUsage)); - if (duration && cpuUsage) { - Send(NFq::ComputeDatabaseControlPlaneServiceActorId(), new TEvYdbCompute::TEvCpuQuotaAdjust(Params.Scope.ToString(), duration, cpuUsage)); - } - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + OnPingRequestStart(); + + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues, std::nullopt, StatusCode); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); } + ReportPublicCounters(Builder.PublicStat); + UpdateCpuQuota(Builder.CpuUsage); + Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); } void Complete() { LOG_I("Execution status: Complete " << Status << ", StatusCode: " << NYql::NDqProto::StatusIds::StatusCode_Name(StatusCode) << " Issues: " << Issues.ToOneLineString()); - auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); - pingCounters->InFly->Inc(); - Fq::Private::PingTaskRequest pingTaskRequest; - NYql::IssuesToMessage(Issues, pingTaskRequest.mutable_issues()); + OnPingRequestStart(); + ComputeStatus = ::FederatedQuery::QueryMeta::COMPLETING; - pingTaskRequest.set_status(ComputeStatus); - PrepareAstAndPlan(pingTaskRequest, QueryStats.query_plan(), QueryStats.query_ast()); - try { - TDuration duration = TDuration::MicroSeconds(QueryStats.total_duration_us()); - double cpuUsage = 0.0; - pingTaskRequest.set_statistics(GetV1StatFromV2Plan(QueryStats.query_plan(), &cpuUsage)); - if (duration && cpuUsage) { - Send(NFq::ComputeDatabaseControlPlaneServiceActorId(), new TEvYdbCompute::TEvCpuQuotaAdjust(Params.Scope.ToString(), duration, cpuUsage)); - } - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues, ComputeStatus, std::nullopt); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); } - Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); - } + ReportPublicCounters(Builder.PublicStat); + UpdateCpuQuota(Builder.CpuUsage); - void PrepareAstAndPlan(Fq::Private::PingTaskRequest& request, const TString& plan, const TString& expr) const { - if (Compressor.IsEnabled()) { - auto [astCompressionMethod, astCompressed] = Compressor.Compress(expr); - request.mutable_ast_compressed()->set_method(astCompressionMethod); - request.mutable_ast_compressed()->set_data(astCompressed); - - auto [planCompressionMethod, planCompressed] = Compressor.Compress(plan); - request.mutable_plan_compressed()->set_method(planCompressionMethod); - request.mutable_plan_compressed()->set_data(planCompressed); - } else { - request.set_ast(expr); - request.set_plan(plan); - } + Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); } private: @@ -288,16 +267,16 @@ class TStatusTrackerActor : public TBaseComputeActor { TActorId Connector; TActorId Pinger; NYdb::TOperation::TOperationId OperationId; + PingTaskRequestBuilder Builder; TCounters Counters; - TInstant StartTime; NYql::TIssues Issues; NYdb::EStatus Status = NYdb::EStatus::SUCCESS; NYdb::NQuery::EExecStatus ExecStatus = NYdb::NQuery::EExecStatus::Unspecified; NYql::NDqProto::StatusIds::StatusCode StatusCode = NYql::NDqProto::StatusIds::StatusCode::StatusIds_StatusCode_UNSPECIFIED; Ydb::TableStats::QueryStats QueryStats; NKikimr::TBackoffTimer BackoffTimer; - const TCompressor Compressor; FederatedQuery::QueryMeta::ComputeStatus ComputeStatus = FederatedQuery::QueryMeta::RUNNING; + TInstant StartTime; }; std::unique_ptr CreateStatusTrackerActor(const TRunActorParams& params, @@ -305,8 +284,9 @@ std::unique_ptr CreateStatusTrackerActor(const TRunActorParams& const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, pinger, operationId, queryCounters); + return std::make_unique(params, parent, connector, pinger, operationId, std::move(processor), queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h index a453e2d4d341..f9fc469202c0 100644 --- a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h +++ b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -13,6 +14,7 @@ std::unique_ptr CreateStatusTrackerActor(const TRunActorParams& const NActors::TActorId& connector, const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters); } diff --git a/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp b/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp index de66c3c1c167..c876bcd4422d 100644 --- a/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp @@ -1,14 +1,18 @@ #include "base_compute_actor.h" -#include "resources_cleaner_actor.h" +#include "stopper_actor.h" +#include #include #include #include #include +#include #include #include #include +#include + #include #include @@ -32,14 +36,21 @@ using namespace NFq; class TStopperActor : public TBaseComputeActor { public: + + using TBase = TBaseComputeActor; + enum ERequestType { RT_CANCEL_OPERATION, + RT_GET_OPERATION, + RT_PING, RT_MAX }; class TCounters: public virtual TThrRefBase { std::array Requests = CreateArray({ - { MakeIntrusive("CancelOperation") } + { MakeIntrusive("CancelOperation") }, + { MakeIntrusive("GetOperation") }, + { MakeIntrusive("Ping") } }); ::NMonitoring::TDynamicCounterPtr Counters; @@ -58,12 +69,14 @@ class TStopperActor : public TBaseComputeActor { } }; - TStopperActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const NYdb::TOperation::TOperationId& operationId, const ::NYql::NCommon::TServiceCounters& queryCounters) - : TBaseComputeActor(queryCounters, "Stopper") + TStopperActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) + : TBase(queryCounters, "Stopper") , Params(params) , Parent(parent) , Connector(connector) + , Pinger(pinger) , OperationId(operationId) + , Builder(params.Config.GetCommon(), std::move(processor)) , Counters(GetStepCountersSubgroup()) {} @@ -77,17 +90,77 @@ class TStopperActor : public TBaseComputeActor { STRICT_STFUNC(StateFunc, hFunc(TEvYdbCompute::TEvCancelOperationResponse, Handle); + hFunc(TEvYdbCompute::TEvGetOperationResponse, Handle); + hFunc(TEvents::TEvForwardPingResponse, Handle); ) void Handle(const TEvYdbCompute::TEvCancelOperationResponse::TPtr& ev) { const auto& response = *ev.Get()->Get(); if (response.Status != NYdb::EStatus::SUCCESS && response.Status != NYdb::EStatus::NOT_FOUND && response.Status != NYdb::EStatus::PRECONDITION_FAILED) { - LOG_E("Can't cancel operation: " << ev->Get()->Issues.ToOneLineString()); - Send(Parent, new TEvYdbCompute::TEvStopperResponse(response.Issues, response.Status)); - FailedAndPassAway(); + LOG_E("Can't cancel operation: " << response.Issues.ToOneLineString()); + Failed(response.Status, response.Issues); return; } + + if (response.Status == NYdb::EStatus::NOT_FOUND) { + LOG_I("Operation successfully canceled and already removed"); + Complete(); + return; + } + LOG_I("Operation successfully canceled: " << response.Status); + Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_GET_OPERATION), SelfId(), Connector, OperationId)); + } + + void Handle(const TEvYdbCompute::TEvGetOperationResponse::TPtr& ev) { + const auto& response = *ev.Get()->Get(); + if (response.Status != NYdb::EStatus::SUCCESS && response.Status != NYdb::EStatus::NOT_FOUND) { + LOG_E("Can't get operation: " << response.Issues.ToOneLineString()); + Failed(response.Status, response.Issues); + return; + } + + if (response.Status == NYdb::EStatus::NOT_FOUND) { + LOG_I("Operation has been already removed"); + Complete(); + return; + } + + auto statusCode = NYql::NDq::YdbStatusToDqStatus(response.StatusCode); + LOG_I("Operation successfully fetched, Status: " << response.Status << ", StatusCode: " << NYql::NDqProto::StatusIds::StatusCode_Name(statusCode) << " Issues: " << response.Issues.ToOneLineString()); + + StartTime = TInstant::Now(); + auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); + pingCounters->InFly->Inc(); + + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(response.QueryStats, response.Issues, FederatedQuery::QueryMeta::ABORTING_BY_USER, statusCode); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); + } + Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); + } + + void Handle(const TEvents::TEvForwardPingResponse::TPtr& ev) { + auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); + pingCounters->InFly->Dec(); + pingCounters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); + + if (ev.Get()->Get()->Success) { + pingCounters->Ok->Inc(); + LOG_I("Information about the status of operation is updated"); + } else { + pingCounters->Error->Inc(); + LOG_E("Error updating information about the status of operation"); + } + Complete(); + } + + void Failed(NYdb::EStatus status, NYql::TIssues issues) { + Send(Parent, new TEvYdbCompute::TEvStopperResponse(issues, status)); + FailedAndPassAway(); + } + + void Complete() { Send(Parent, new TEvYdbCompute::TEvStopperResponse({}, NYdb::EStatus::SUCCESS)); CompleteAndPassAway(); } @@ -96,16 +169,21 @@ class TStopperActor : public TBaseComputeActor { TRunActorParams Params; TActorId Parent; TActorId Connector; + TActorId Pinger; NYdb::TOperation::TOperationId OperationId; + PingTaskRequestBuilder Builder; TCounters Counters; + TInstant StartTime; }; std::unique_ptr CreateStopperActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, + const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, operationId, queryCounters); + return std::make_unique(params, parent, connector, pinger, operationId, std::move(processor), queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/stopper_actor.h b/ydb/core/fq/libs/compute/ydb/stopper_actor.h index e4046dc176f6..f078664566c2 100644 --- a/ydb/core/fq/libs/compute/ydb/stopper_actor.h +++ b/ydb/core/fq/libs/compute/ydb/stopper_actor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -11,7 +12,9 @@ namespace NFq { std::unique_ptr CreateStopperActor(const TRunActorParams& params, const NActors::TActorId& parent, const NActors::TActorId& connector, + const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters); } diff --git a/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp b/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp index db8319ec5a1d..a576908d779f 100644 --- a/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp @@ -24,12 +24,7 @@ class TYdbConnectorActor : public NActors::TActorBootstrapped(ComputeConnection, CredentialsProviderFactory); @@ -55,7 +50,7 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedExecuteScript(event.Sql, settings) @@ -68,16 +63,18 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.Status().GetIssues(), - response.Status().GetStatus(), database), + response.Status().GetStatus()), 0, cookie); } } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); @@ -90,21 +87,34 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend(recipient, new TEvYdbCompute::TEvGetOperationResponse(response.Metadata().ExecStatus, static_cast(response.Status().GetStatus()), response.Metadata().ResultSetsMeta, response.Metadata().ExecStats, RemoveDatabaseFromIssues(response.Status().GetIssues(), database)), 0, cookie); + actorSystem->Send( + recipient, + new TEvYdbCompute::TEvGetOperationResponse( + response.Metadata().ExecStatus, + static_cast(response.Status().GetStatus()), + response.Metadata().ResultSetsMeta, + response.Metadata().ExecStats, + RemoveDatabaseFromIssues(response.Status().GetIssues(), database), + response.Ready()), + 0, cookie); } else { actorSystem->Send( recipient, MakeResponse( + database, response.Status().GetIssues(), - response.Status().GetStatus(), database), + response.Status().GetStatus(), + true), 0, cookie); } } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR, + true), 0, cookie); } }); @@ -124,16 +134,18 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.GetIssues(), - response.GetStatus(), database), + response.GetStatus()), 0, cookie); } } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); @@ -148,15 +160,17 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.GetIssues(), - response.GetStatus(), database), + response.GetStatus()), 0, cookie); } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); @@ -171,28 +185,30 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.GetIssues(), - response.GetStatus(), database), + response.GetStatus()), 0, cookie); } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); } - template - static TResponse* MakeResponse(TString msg, NYdb::EStatus status, TString databasePath) { - return new TResponse(NYql::TIssues{NYql::TIssue{RemoveDatabaseFromStr(msg, databasePath)}}, status); + template + static TResponse* MakeResponse(TString databasePath, TString msg, TArgs&&... args) { + return new TResponse(NYql::TIssues{NYql::TIssue{RemoveDatabaseFromStr(msg, databasePath)}}, std::forward(args)...); } - template - static TResponse* MakeResponse(const NYql::TIssues& issues, NYdb::EStatus status, TString databasePath) { - return new TResponse(RemoveDatabaseFromIssues(issues, databasePath), status); + template + static TResponse* MakeResponse(TString databasePath, const NYql::TIssues& issues, TArgs&&... args) { + return new TResponse(RemoveDatabaseFromIssues(issues, databasePath), std::forward(args)...); } private: @@ -201,7 +217,6 @@ class TYdbConnectorActor : public NActors::TActorBootstrapped QueryClient; std::unique_ptr OperationClient; - Ydb::Query::StatsMode StatsMode; }; std::unique_ptr CreateConnectorActor(const TRunActorParams& params) { diff --git a/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp b/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp index e382035e183a..89392e15bda2 100644 --- a/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp @@ -98,10 +98,14 @@ class TYdbRunActor : public NActors::TActorBootstrapped { } void Handle(const TEvYdbCompute::TEvStatusTrackerResponse::TPtr& ev) { + if (CancelOperationIsRunning("StatusTrackerResponse (aborting). ")) { + return; + } + auto& response = *ev->Get(); if (response.Status == NYdb::EStatus::NOT_FOUND) { // FAILING / ABORTING_BY_USER / ABORTING_BY_SYSTEM LOG_I("StatusTrackerResponse (not found). Status: " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, Params.Status).release()); + CreateFinalizer(Params.Status); return; } @@ -114,13 +118,17 @@ class TYdbRunActor : public NActors::TActorBootstrapped { Params.Status = response.ComputeStatus; LOG_I("StatusTrackerResponse (success) " << response.Status << " ExecStatus: " << static_cast(response.ExecStatus) << " Issues: " << response.Issues.ToOneLineString()); if (response.ExecStatus == NYdb::NQuery::EExecStatus::Completed) { - Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId).release()); + Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId, true).release()); } else { - Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + CreateResourcesCleaner(); } } void Handle(const TEvYdbCompute::TEvResultWriterResponse::TPtr& ev) { + if (CancelOperationIsRunning("ResultWriterResponse (aborting). ")) { + return; + } + auto& response = *ev->Get(); if (response.Status != NYdb::EStatus::SUCCESS) { LOG_I("ResultWriterResponse (failed). Status: " << response.Status << " Issues: " << response.Issues.ToOneLineString()); @@ -128,7 +136,7 @@ class TYdbRunActor : public NActors::TActorBootstrapped { return; } LOG_I("ResultWriterResponse (success) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + CreateResourcesCleaner(); } void Handle(const TEvYdbCompute::TEvResourcesCleanerResponse::TPtr& ev) { @@ -139,22 +147,23 @@ class TYdbRunActor : public NActors::TActorBootstrapped { return; } LOG_I("ResourcesCleanerResponse (success) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, IsAborted ? FederatedQuery::QueryMeta::ABORTING_BY_USER : Params.Status).release()); + CreateFinalizer(IsAborted ? FederatedQuery::QueryMeta::ABORTING_BY_USER : Params.Status); } void Handle(const TEvYdbCompute::TEvFinalizerResponse::TPtr ev) { // Pinger is no longer available at this place. // The query can be restarted only after the expiration of lease in case of error auto& response = *ev->Get(); - LOG_I("FinalizerResponse ( " << (response.Status == NYdb::EStatus::SUCCESS ? "success" : "failed") << ") " << response.Status << " Issues: " << response.Issues.ToOneLineString()); + LOG_I("FinalizerResponse ( " << (response.Status == NYdb::EStatus::SUCCESS ? "success" : "failed") << " ) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); FinishAndPassAway(); } void Handle(TEvents::TEvQueryActionResult::TPtr& ev) { LOG_I("QueryActionResult: " << FederatedQuery::QueryAction_Name(ev->Get()->Action)); - if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED && !IsAborted) { + // Start cancel operation only when StatusTracker or ResultWriter is running + if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED && !IsAborted && !FinalizationStarted) { IsAborted = true; - Register(ActorFactory->CreateStopper(SelfId(), Connector, Params.OperationId).release()); + Register(ActorFactory->CreateStopper(SelfId(), Connector, Pinger, Params.OperationId).release()); } } @@ -166,7 +175,7 @@ class TYdbRunActor : public NActors::TActorBootstrapped { return; } LOG_I("StopperResponse (success) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + CreateResourcesCleaner(); } void Run() { // recover points @@ -183,9 +192,9 @@ class TYdbRunActor : public NActors::TActorBootstrapped { break; case FederatedQuery::QueryMeta::COMPLETING: if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED) { - Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId).release()); + Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId, false).release()); } else { - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, Params.Status).release()); + CreateFinalizer(Params.Status); } break; case FederatedQuery::QueryMeta::FAILING: @@ -194,7 +203,7 @@ class TYdbRunActor : public NActors::TActorBootstrapped { if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED) { Register(ActorFactory->CreateStatusTracker(SelfId(), Connector, Pinger, Params.OperationId).release()); } else { - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, Params.Status).release()); + CreateFinalizer(Params.Status); } break; default: @@ -220,8 +229,28 @@ class TYdbRunActor : public NActors::TActorBootstrapped { PassAway(); } + void CreateResourcesCleaner() { + FinalizationStarted = true; + Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + } + + void CreateFinalizer(FederatedQuery::QueryMeta::ComputeStatus status) { + FinalizationStarted = true; + Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, status).release()); + } + + bool CancelOperationIsRunning(const TString& stage) const { + if (!IsAborted) { + return false; + } + + LOG_I(stage << "Stop task execution, cancel operation now is running"); + return true; + } + private: bool IsAborted = false; + bool FinalizationStarted = false; TActorId FetcherId; NYdb::NQuery::EExecStatus ExecStatus = NYdb::NQuery::EExecStatus::Unspecified; TRunActorParams Params; diff --git a/ydb/core/fq/libs/config/protos/compute.proto b/ydb/core/fq/libs/config/protos/compute.proto index 6084dc6cede6..c4d941fc0862 100644 --- a/ydb/core/fq/libs/config/protos/compute.proto +++ b/ydb/core/fq/libs/config/protos/compute.proto @@ -26,6 +26,7 @@ message TLoadControlConfig { uint32 PendingQueueSize = 6; // default 0 == instant decline if overloaded bool Strict = 7; // default false, whether to deny execution in load level unavailable uint32 CpuNumber = 8; + string MonitoringEndpoint = 9; // if defined, will be used as REST API instead of default GRPC } message TComputeDatabaseConfig { diff --git a/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp b/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp index 420c7743be3e..7d73bcee6855 100644 --- a/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp +++ b/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp @@ -177,6 +177,14 @@ TString MakeCreateExternalDataSourceQuery( switch (connectionContent.setting().connection_case()) { case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: case FederatedQuery::ConnectionSetting::kYdbDatabase: + properties = fmt::format( + R"( + SOURCE_TYPE="Ydb", + DATABASE_ID={database_id}, + USE_TLS="{use_tls}" + )", + "database_id"_a = EncloseAndEscapeString(connectionContent.setting().ydb_database().database_id(), '"'), + "use_tls"_a = common.GetDisableSslForGenericDataSources() ? "false" : "true"); break; case FederatedQuery::ConnectionSetting::kClickhouseCluster: properties = fmt::format( diff --git a/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp b/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp index 1ddbb70aa802..66dd314978c9 100644 --- a/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp +++ b/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp @@ -1,7 +1,6 @@ #include "config.h" #include "control_plane_proxy.h" #include "probes.h" -#include "utils.h" #include #include @@ -23,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/ydb/core/fq/libs/control_plane_proxy/events/events.h b/ydb/core/fq/libs/control_plane_proxy/events/events.h index 397a43a3a8ee..46091413dae6 100644 --- a/ydb/core/fq/libs/control_plane_proxy/events/events.h +++ b/ydb/core/fq/libs/control_plane_proxy/events/events.h @@ -394,4 +394,6 @@ struct TEvControlPlaneProxy { }; }; +NActors::TActorId ControlPlaneProxyActorId(); + } diff --git a/ydb/core/fq/libs/control_plane_proxy/utils.h b/ydb/core/fq/libs/control_plane_proxy/utils/utils.h similarity index 100% rename from ydb/core/fq/libs/control_plane_proxy/utils.h rename to ydb/core/fq/libs/control_plane_proxy/utils/utils.h diff --git a/ydb/core/fq/libs/control_plane_proxy/utils/ya.make b/ydb/core/fq/libs/control_plane_proxy/utils/ya.make new file mode 100644 index 000000000000..9fe9e6e570bf --- /dev/null +++ b/ydb/core/fq/libs/control_plane_proxy/utils/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +PEERDIR( + ydb/public/api/protos +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/fq/libs/control_plane_proxy/ya.make b/ydb/core/fq/libs/control_plane_proxy/ya.make index 4bd32ad74c0c..bb6fe225efa7 100644 --- a/ydb/core/fq/libs/control_plane_proxy/ya.make +++ b/ydb/core/fq/libs/control_plane_proxy/ya.make @@ -33,6 +33,7 @@ END() RECURSE( actors events + utils ) RECURSE_FOR_TESTS( diff --git a/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp b/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp index 246d3f3852d1..c0802446d019 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp +++ b/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp @@ -256,7 +256,8 @@ TPingTaskParams ConstructHardPingTask( internal.clear_statistics(); PackStatisticsToProtobuf(*internal.mutable_statistics(), statistics); - if (!dumpRawStatistics) { + // global dumpRawStatistics will be removed with YQv1 + if (!dumpRawStatistics && !request.dump_raw_statistics()) { try { statistics = GetPrettyStatistics(statistics); } catch (const std::exception&) { diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp index 5bc2b76e7986..dd3f014e851f 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp @@ -200,7 +200,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListBinding queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" BINDING_ID_COLUMN_NAME "`, `" BINDING_COLUMN_NAME "` FROM `" BINDINGS_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" BINDING_ID_COLUMN_NAME "`, `" BINDING_COLUMN_NAME "` FROM `" BINDINGS_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" BINDING_ID_COLUMN_NAME "` >= $last_binding\n" ); @@ -241,7 +241,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListBinding } queryBuilder.AddText( - "ORDER BY `" BINDING_ID_COLUMN_NAME "`\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" BINDING_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp index c999bbae6177..1b539d3c8a77 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp @@ -216,7 +216,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListConnect queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" CONNECTION_ID_COLUMN_NAME "`, `" CONNECTION_COLUMN_NAME "` FROM `" CONNECTIONS_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" CONNECTION_ID_COLUMN_NAME "`, `" CONNECTION_COLUMN_NAME "` FROM `" CONNECTIONS_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" CONNECTION_ID_COLUMN_NAME "` >= $last_connection\n" ); @@ -257,7 +257,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListConnect } queryBuilder.AddText( - "ORDER BY `" CONNECTION_ID_COLUMN_NAME "`\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" CONNECTION_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp index 7ab56126f135..32026450cafa 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp @@ -28,7 +28,7 @@ FederatedQuery::IamAuth::IdentityCase GetIamAuth(const FederatedQuery::Connectio const auto& setting = connection.content().setting(); switch (setting.connection_case()) { case FederatedQuery::ConnectionSetting::kYdbDatabase: - return setting.data_streams().auth().identity_case(); + return setting.ydb_database().auth().identity_case(); case FederatedQuery::ConnectionSetting::kClickhouseCluster: return setting.clickhouse_cluster().auth().identity_case(); case FederatedQuery::ConnectionSetting::kObjectStorage: @@ -391,7 +391,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListQueries queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" QUERY_ID_COLUMN_NAME "`, `" QUERY_COLUMN_NAME "` FROM `" QUERIES_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`, `" QUERY_COLUMN_NAME "` FROM `" QUERIES_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` >= $last_query AND (`" EXPIRE_AT_COLUMN_NAME "` is NULL OR `" EXPIRE_AT_COLUMN_NAME "` > $now)" ); @@ -461,7 +461,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListQueries } queryBuilder.AddText( - "ORDER BY " QUERY_ID_COLUMN_NAME "\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); @@ -1519,9 +1519,9 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetResultDa "SELECT `" QUERY_COLUMN_NAME "`, `" USER_COLUMN_NAME "`, `" VISIBILITY_COLUMN_NAME "`, `" STATUS_COLUMN_NAME "`, `" RESULT_SETS_EXPIRE_AT_COLUMN_NAME "` FROM $query_info;\n" "$result_id = SELECT `" RESULT_ID_COLUMN_NAME "` FROM $query_info\n" "WHERE `" RESULT_SETS_EXPIRE_AT_COLUMN_NAME "` >= $now;\n" - "SELECT `" RESULT_SET_ID_COLUMN_NAME "`, `" RESULT_SET_COLUMN_NAME "`, `" ROW_ID_COLUMN_NAME "` FROM `" RESULT_SETS_TABLE_NAME "`\n" + "SELECT `" RESULT_ID_COLUMN_NAME "`, `" RESULT_SET_ID_COLUMN_NAME "`, `" RESULT_SET_COLUMN_NAME "`, `" ROW_ID_COLUMN_NAME "` FROM `" RESULT_SETS_TABLE_NAME "`\n" "WHERE `" RESULT_ID_COLUMN_NAME "` = $result_id AND `" RESULT_SET_ID_COLUMN_NAME "` = $result_set_index AND `" ROW_ID_COLUMN_NAME "` >= $offset\n" - "ORDER BY `" ROW_ID_COLUMN_NAME "`\n" + "ORDER BY `" RESULT_ID_COLUMN_NAME "`, `" RESULT_SET_ID_COLUMN_NAME "`, `" ROW_ID_COLUMN_NAME "`\n" "LIMIT $limit;\n" ); @@ -1653,7 +1653,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListJobsReq queryBuilder.AddTimestamp("now", TInstant::Now()); queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" JOB_ID_COLUMN_NAME "`, `" JOB_COLUMN_NAME "` FROM `" JOBS_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`, `" JOB_ID_COLUMN_NAME "`, `" JOB_COLUMN_NAME "` FROM `" JOBS_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` >= $last_query\n" "AND `" JOB_ID_COLUMN_NAME "` >= $last_job AND (`" EXPIRE_AT_COLUMN_NAME "` is NULL OR `" EXPIRE_AT_COLUMN_NAME "` > $now) " ); @@ -1681,7 +1681,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListJobsReq } queryBuilder.AddText( - "ORDER BY `" JOB_ID_COLUMN_NAME "`\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`, `" JOB_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); diff --git a/ydb/core/fq/libs/events/events.h b/ydb/core/fq/libs/events/events.h index e0a84beb4d4c..73fa33dc4d6d 100644 --- a/ydb/core/fq/libs/events/events.h +++ b/ydb/core/fq/libs/events/events.h @@ -250,6 +250,8 @@ struct TEvents { }; }; +NActors::TActorId MakeYqPrivateProxyId(); + } // namespace NFq template<> diff --git a/ydb/core/fq/libs/init/init.cpp b/ydb/core/fq/libs/init/init.cpp index 0714425a6eb5..d045af7b0a02 100644 --- a/ydb/core/fq/libs/init/init.cpp +++ b/ydb/core/fq/libs/init/init.cpp @@ -177,7 +177,10 @@ void Init( &protoConfig.GetGateways().GetHttpGateway(), yqCounters->GetSubgroup("subcomponent", "http_gateway")); - const auto connectorClient = NYql::NConnector::MakeClientGRPC(protoConfig.GetGateways().GetGeneric().GetConnector()); + NYql::NConnector::IClient::TPtr connectorClient = nullptr; + if (protoConfig.GetGateways().GetGeneric().HasConnector()) { + connectorClient = NYql::NConnector::MakeClientGRPC(protoConfig.GetGateways().GetGeneric().GetConnector()); + } if (protoConfig.GetTokenAccessor().GetEnabled()) { const auto& tokenAccessorConfig = protoConfig.GetTokenAccessor(); diff --git a/ydb/core/fq/libs/protos/fq_private.proto b/ydb/core/fq/libs/protos/fq_private.proto index bba6f0ffdb40..61d9f4925658 100644 --- a/ydb/core/fq/libs/protos/fq_private.proto +++ b/ydb/core/fq/libs/protos/fq_private.proto @@ -162,6 +162,7 @@ message PingTaskRequest { string operation_id = 35; string execution_id = 36; NYql.NDqProto.StatusIds.StatusCode pending_status_code = 37; + bool dump_raw_statistics = 38; } message PingTaskResult { diff --git a/ydb/core/grpc_services/query/rpc_execute_query.cpp b/ydb/core/grpc_services/query/rpc_execute_query.cpp index 6865cb8379e5..ba8b42cf88bc 100644 --- a/ydb/core/grpc_services/query/rpc_execute_query.cpp +++ b/ydb/core/grpc_services/query/rpc_execute_query.cpp @@ -394,12 +394,18 @@ class TExecuteQueryRPC : public TActorBootstrapped { return; } - if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - Request_->SetRuHeader(record.GetConsumedRu()); + Ydb::Query::ExecuteQueryResponsePart response; - auto& kqpResponse = record.GetResponse(); + if (NeedReportStats(*Request_->GetProtoRequest())) { + hasTrailingMessage = true; + FillQueryStats(*response.mutable_exec_stats(), kqpResponse); + if (NeedReportAst(*Request_->GetProtoRequest())) { + response.mutable_exec_stats()->set_query_ast(kqpResponse.GetQueryAst()); + } + } - Ydb::Query::ExecuteQueryResponsePart response; + if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { + Request_->SetRuHeader(record.GetConsumedRu()); if (QueryAction == NKikimrKqp::QUERY_ACTION_EXECUTE) { for(int i = 0; i < kqpResponse.GetYdbResults().size(); i++) { @@ -415,27 +421,15 @@ class TExecuteQueryRPC : public TActorBootstrapped { hasTrailingMessage = true; response.mutable_tx_meta()->set_id(kqpResponse.GetTxMeta().id()); } - - if (NeedReportStats(*Request_->GetProtoRequest())) { - hasTrailingMessage = true; - FillQueryStats(*response.mutable_exec_stats(), kqpResponse); - if (NeedReportAst(*Request_->GetProtoRequest())) { - response.mutable_exec_stats()->set_query_ast(kqpResponse.GetQueryAst()); - } - } - - if (hasTrailingMessage) { - response.set_status(Ydb::StatusIds::SUCCESS); - response.mutable_issues()->CopyFrom(issueMessage); - TString out; - Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); - const auto finishStreamFlag = NYdbGrpc::IRequestContextBase::EStreamCtrl::FINISH; - Request_->SendSerializedResult(std::move(out), record.GetYdbStatus(), finishStreamFlag); - this->PassAway(); - } } - if (!hasTrailingMessage) { + if (hasTrailingMessage) { + response.set_status(record.GetYdbStatus()); + response.mutable_issues()->CopyFrom(issueMessage); + TString out; + Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); + ReplySerializedAndFinishStream(record.GetYdbStatus(), std::move(out)); + } else { NYql::TIssues issues; NYql::IssuesFromMessage(issueMessage, issues); ReplyFinishStream(record.GetYdbStatus(), issueMessage); @@ -454,6 +448,12 @@ class TExecuteQueryRPC : public TActorBootstrapped { ReplyFinishStream(Ydb::StatusIds::INTERNAL_ERROR, issue); } + void ReplySerializedAndFinishStream(Ydb::StatusIds::StatusCode status, TString&& buf) { + const auto finishStreamFlag = NYdbGrpc::IRequestContextBase::EStreamCtrl::FINISH; + Request_->SendSerializedResult(std::move(buf), status, finishStreamFlag); + this->PassAway(); + } + void ReplyFinishStream(Ydb::StatusIds::StatusCode status, const NYql::TIssue& issue) { google::protobuf::RepeatedPtrField issuesMessage; NYql::IssueToMessage(issue, issuesMessage.Add()); diff --git a/ydb/core/grpc_services/rpc_fq.cpp b/ydb/core/grpc_services/rpc_fq.cpp index 49701c1b80b6..8d165726f340 100644 --- a/ydb/core/grpc_services/rpc_fq.cpp +++ b/ydb/core/grpc_services/rpc_fq.cpp @@ -4,9 +4,8 @@ #include #include #include -#include #include -#include +#include #include #include diff --git a/ydb/core/grpc_services/rpc_fq_internal.cpp b/ydb/core/grpc_services/rpc_fq_internal.cpp index 96dd18b34492..02c80ba4d125 100644 --- a/ydb/core/grpc_services/rpc_fq_internal.cpp +++ b/ydb/core/grpc_services/rpc_fq_internal.cpp @@ -3,7 +3,6 @@ #include "rpc_deferrable.h" #include -#include #include #include diff --git a/ydb/core/grpc_services/service_fq.h b/ydb/core/grpc_services/service_fq.h index f632bb2ee9cc..85b226f8ea02 100644 --- a/ydb/core/grpc_services/service_fq.h +++ b/ydb/core/grpc_services/service_fq.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace NKikimr { namespace NGRpcService { diff --git a/ydb/core/grpc_services/ya.make b/ydb/core/grpc_services/ya.make index c3fd88321a38..6af54cac7e44 100644 --- a/ydb/core/grpc_services/ya.make +++ b/ydb/core/grpc_services/ya.make @@ -95,8 +95,7 @@ PEERDIR( ydb/core/discovery ydb/core/engine ydb/core/formats - ydb/core/fq/libs/actors - ydb/core/fq/libs/control_plane_proxy + ydb/core/fq/libs/events ydb/core/fq/libs/control_plane_proxy/events ydb/core/grpc_services/base ydb/core/grpc_services/counters diff --git a/ydb/core/kqp/common/events/script_executions.h b/ydb/core/kqp/common/events/script_executions.h index 6b2b331e368e..f5157a1a10b2 100644 --- a/ydb/core/kqp/common/events/script_executions.h +++ b/ydb/core/kqp/common/events/script_executions.h @@ -221,20 +221,28 @@ struct TEvFetchScriptResultsQueryResponse : public NActors::TEventLocal { + struct TDescription { + TDescription(const TString& executionId, const TString& database, const TString& customerSuppliedId, const TString& userToken) + : ExecutionId(executionId) + , Database(database) + , CustomerSuppliedId(customerSuppliedId) + , UserToken(userToken) + {} + + TString ExecutionId; + TString Database; + + TString CustomerSuppliedId; + TString UserToken; + std::vector Sinks; + std::vector SecretNames; + }; + TEvSaveScriptExternalEffectRequest(const TString& executionId, const TString& database, const TString& customerSuppliedId, const TString& userToken) - : ExecutionId(executionId) - , Database(database) - , CustomerSuppliedId(customerSuppliedId) - , UserToken(userToken) + : Description(executionId, database, customerSuppliedId, userToken) {} - TString ExecutionId; - TString Database; - - TString CustomerSuppliedId; - TString UserToken; - std::vector Sinks; - std::vector SecretNames; + TDescription Description; }; struct TEvSaveScriptExternalEffectResponse : public NActors::TEventLocal { @@ -248,31 +256,41 @@ struct TEvSaveScriptExternalEffectResponse : public NActors::TEventLocal { + struct TDescription { + TDescription(EFinalizationStatus finalizationStatus, const TString& executionId, const TString& database, + Ydb::StatusIds::StatusCode operationStatus, Ydb::Query::ExecStatus execStatus, NYql::TIssues issues, std::optional queryStats, + std::optional queryPlan, std::optional queryAst, std::optional leaseGeneration) + : FinalizationStatus(finalizationStatus) + , ExecutionId(executionId) + , Database(database) + , OperationStatus(operationStatus) + , ExecStatus(execStatus) + , Issues(std::move(issues)) + , QueryStats(std::move(queryStats)) + , QueryPlan(std::move(queryPlan)) + , QueryAst(std::move(queryAst)) + , LeaseGeneration(leaseGeneration) + {} + + EFinalizationStatus FinalizationStatus; + TString ExecutionId; + TString Database; + Ydb::StatusIds::StatusCode OperationStatus; + Ydb::Query::ExecStatus ExecStatus; + NYql::TIssues Issues; + std::optional QueryStats; + std::optional QueryPlan; + std::optional QueryAst; + std::optional LeaseGeneration; + }; + TEvScriptFinalizeRequest(EFinalizationStatus finalizationStatus, const TString& executionId, const TString& database, Ydb::StatusIds::StatusCode operationStatus, Ydb::Query::ExecStatus execStatus, NYql::TIssues issues = {}, std::optional queryStats = std::nullopt, std::optional queryPlan = std::nullopt, std::optional queryAst = std::nullopt, std::optional leaseGeneration = std::nullopt) - : FinalizationStatus(finalizationStatus) - , ExecutionId(executionId) - , Database(database) - , OperationStatus(operationStatus) - , ExecStatus(execStatus) - , Issues(std::move(issues)) - , QueryStats(std::move(queryStats)) - , QueryPlan(std::move(queryPlan)) - , QueryAst(std::move(queryAst)) - , LeaseGeneration(leaseGeneration) + : Description(finalizationStatus, executionId, database, operationStatus, execStatus, issues, queryStats, queryPlan, queryAst, leaseGeneration) {} - EFinalizationStatus FinalizationStatus; - TString ExecutionId; - TString Database; - Ydb::StatusIds::StatusCode OperationStatus; - Ydb::Query::ExecStatus ExecStatus; - NYql::TIssues Issues; - std::optional QueryStats; - std::optional QueryPlan; - std::optional QueryAst; - std::optional LeaseGeneration; + TDescription Description; }; struct TEvScriptFinalizeResponse : public NActors::TEventLocal { @@ -284,15 +302,14 @@ struct TEvScriptFinalizeResponse : public NActors::TEventLocal { - TEvSaveScriptFinalStatusResponse(const TString& customerSuppliedId, const TString& userToken) - : CustomerSuppliedId(customerSuppliedId) - , UserToken(userToken) - {} - + bool ApplicateScriptExternalEffectRequired = false; + bool OperationAlreadyFinalized = false; TString CustomerSuppliedId; TString UserToken; std::vector Sinks; std::vector SecretNames; + Ydb::StatusIds::StatusCode Status; + NYql::TIssues Issues; }; struct TEvDescribeSecretsResponse : public NActors::TEventLocal { diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 2d1daba6b006..b6468d870b5a 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -372,6 +372,18 @@ class TKqpCompileActor : public TActorBootstrapped { PassAway(); } + void FillCompileResult(std::unique_ptr preparingQuery, NKikimrKqp::EQueryType queryType) { + auto preparedQueryHolder = std::make_shared( + preparingQuery.release(), AppData()->FunctionRegistry); + preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType); + KqpCompileResult->PreparedQuery = preparedQueryHolder; + KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()); + + if (AstResult) { + KqpCompileResult->Ast = AstResult->Ast; + } + } + void Handle(TEvKqp::TEvContinueProcess::TPtr &ev, const TActorContext &ctx) { Y_ENSURE(!ev->Get()->QueryId); @@ -403,17 +415,7 @@ class TKqpCompileActor : public TActorBootstrapped { if (status == Ydb::StatusIds::SUCCESS) { YQL_ENSURE(kqpResult.PreparingQuery); - { - auto preparedQueryHolder = std::make_shared( - kqpResult.PreparingQuery.release(), AppData()->FunctionRegistry); - preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType); - KqpCompileResult->PreparedQuery = preparedQueryHolder; - KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()); - - if (AstResult) { - KqpCompileResult->Ast = AstResult->Ast; - } - } + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType); auto now = TInstant::Now(); auto duration = now - StartTime; @@ -423,6 +425,10 @@ class TKqpCompileActor : public TActorBootstrapped { << ", self: " << ctx.SelfID << ", duration: " << duration); } else { + if (kqpResult.PreparingQuery) { + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType); + } + LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_ACTOR, "Compilation failed" << ", self: " << ctx.SelfID << ", status: " << Ydb::StatusIds_StatusCode_Name(status) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 4ff23bfa0165..eeca1aa36882 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -1615,13 +1615,13 @@ class TKqpDataExecuter : public TKqpExecuterBaseSinks.push_back(sink.GetExternalSink()); + scriptExternalEffect->Description.Sinks.push_back(sink.GetExternalSink()); } } } } } - scriptExternalEffect->SecretNames = SecretNames; + scriptExternalEffect->Description.SecretNames = SecretNames; if (!WaitRequired()) { return Execute(); diff --git a/ydb/core/kqp/executer_actor/ya.make b/ydb/core/kqp/executer_actor/ya.make index 9cb7618afb0d..9b0e374f5b7a 100644 --- a/ydb/core/kqp/executer_actor/ya.make +++ b/ydb/core/kqp/executer_actor/ya.make @@ -26,6 +26,7 @@ PEERDIR( ydb/core/client/minikql_compile ydb/core/formats ydb/core/kqp/common + ydb/core/kqp/compute_actor ydb/core/kqp/query_compiler ydb/core/kqp/rm_service ydb/core/kqp/topics diff --git a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp index 6104e99ef9d0..aa6f8b3ac855 100644 --- a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp +++ b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp @@ -105,11 +105,12 @@ namespace NKikimr::NKqp { GenericGatewaysConfig}; // Init DatabaseAsyncResolver only if all requirements are met - if (DatabaseResolverActorId && GenericGatewaysConfig.HasMdbGateway() && MdbEndpointGenerator) { + if (DatabaseResolverActorId && MdbEndpointGenerator && + (GenericGatewaysConfig.HasMdbGateway() || GenericGatewaysConfig.HasYdbMvpEndpoint())) { result.DatabaseAsyncResolver = std::make_shared( actorSystem, DatabaseResolverActorId.value(), - "", // TODO: use YDB Gateway endpoint? + GenericGatewaysConfig.GetYdbMvpEndpoint(), GenericGatewaysConfig.GetMdbGateway(), MdbEndpointGenerator); } diff --git a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp index 03d2a475bbe3..6ffc4b58b3d2 100644 --- a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp +++ b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp @@ -22,9 +22,9 @@ class TScriptFinalizerActor : public TActorBootstrapped { const NKikimrConfig::TMetadataProviderConfig& metadataProviderConfig, const std::optional& federatedQuerySetup) : ReplyActor_(request->Sender) - , ExecutionId_(request->Get()->ExecutionId) - , Database_(request->Get()->Database) - , FinalizationStatus_(request->Get()->FinalizationStatus) + , ExecutionId_(request->Get()->Description.ExecutionId) + , Database_(request->Get()->Description.Database) + , FinalizationStatus_(request->Get()->Description.FinalizationStatus) , Request_(std::move(request)) , FinalizationTimeout_(TDuration::Seconds(finalizeScriptServiceConfig.GetScriptFinalizationTimeoutSeconds())) , MaximalSecretsSnapshotWaitTime_(2 * TDuration::Seconds(metadataProviderConfig.GetRefreshPeriodSeconds())) @@ -32,16 +32,20 @@ class TScriptFinalizerActor : public TActorBootstrapped { {} void Bootstrap() { - Register(CreateSaveScriptFinalStatusActor(std::move(Request_))); + Register(CreateSaveScriptFinalStatusActor(SelfId(), std::move(Request_))); Become(&TScriptFinalizerActor::FetchState); } STRICT_STFUNC(FetchState, hFunc(TEvSaveScriptFinalStatusResponse, Handle); - hFunc(TEvScriptExecutionFinished, Handle); ) void Handle(TEvSaveScriptFinalStatusResponse::TPtr& ev) { + if (!ev->Get()->ApplicateScriptExternalEffectRequired || ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + Reply(ev->Get()->OperationAlreadyFinalized, ev->Get()->Status, std::move(ev->Get()->Issues)); + return; + } + Schedule(FinalizationTimeout_, new TEvents::TEvWakeup()); Become(&TScriptFinalizerActor::PrepareState); @@ -168,7 +172,7 @@ class TScriptFinalizerActor : public TActorBootstrapped { ) void FinishScriptFinalization(std::optional status, NYql::TIssues issues) { - Register(CreateScriptFinalizationFinisherActor(ExecutionId_, Database_, status, std::move(issues))); + Register(CreateScriptFinalizationFinisherActor(SelfId(), ExecutionId_, Database_, status, std::move(issues))); Become(&TScriptFinalizerActor::FinishState); } @@ -181,7 +185,11 @@ class TScriptFinalizerActor : public TActorBootstrapped { } void Handle(TEvScriptExecutionFinished::TPtr& ev) { - Send(ReplyActor_, ev->Release()); + Reply(ev->Get()->OperationAlreadyFinalized, ev->Get()->Status, std::move(ev->Get()->Issues)); + } + + void Reply(bool operationAlreadyFinalized, Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) { + Send(ReplyActor_, new TEvScriptExecutionFinished(operationAlreadyFinalized, status, std::move(issues))); Send(MakeKqpFinalizeScriptServiceId(SelfId().NodeId()), new TEvScriptFinalizeResponse(ExecutionId_)); PassAway(); diff --git a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp index cf6c66d5b597..6c0868e4c42b 100644 --- a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp +++ b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp @@ -27,9 +27,10 @@ class TKqpFinalizeScriptService : public TActorBootstrappedGet()->Sinks = FilterExternalSinks(ev->Get()->Sinks); + auto& description = ev->Get()->Description; + description.Sinks = FilterExternalSinks(description.Sinks); - if (!ev->Get()->Sinks.empty()) { + if (!description.Sinks.empty()) { Register(CreateSaveScriptExternalEffectActor(std::move(ev))); } else { Send(ev->Sender, new TEvSaveScriptExternalEffectResponse(Ydb::StatusIds::SUCCESS, {})); @@ -37,7 +38,7 @@ class TKqpFinalizeScriptService : public TActorBootstrappedGet()->ExecutionId; + TString executionId = ev->Get()->Description.ExecutionId; if (!FinalizationRequestsQueue_.contains(executionId)) { WaitingFinalizationExecutions_.push(executionId); diff --git a/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp b/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp index ad65bde5da0f..ad491f0d9e4c 100644 --- a/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp @@ -66,11 +66,12 @@ void FillCreateExternalDataSourceDesc(NKikimrSchemeOp::TExternalDataSourceDescri } static const TSet properties { - "database_name", - "protocol", - "mdb_cluster_id", + "database_name", + "protocol", // managed PG, CH + "mdb_cluster_id", // managed PG, CH + "database_id", // managed YDB "use_tls", - "schema" + "schema", // managed PG }; for (const auto& property: properties) { diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index 750fe8120dbc..b194950eee7b 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -182,6 +182,10 @@ class TAsyncValidateYqlResult : public TKqpAsyncResultBaseQuery().PrepareOnly); validateResult.PreparedQuery.reset(SessionCtx->Query().PreparingQuery.release()); validateResult.SqlVersion = SqlVersion; @@ -211,6 +215,10 @@ class TAsyncExplainYqlResult : public TKqpAsyncResultBase plans; for (auto id : SessionCtx->Query().ExecutionOrder) { @@ -253,6 +261,10 @@ class TAsyncExecuteYqlResult : public TKqpAsyncResultBase(queryResult.ProtobufArenaPtr.get())); @@ -300,6 +312,10 @@ class TAsyncExecuteKqlResult : public TKqpAsyncResultBaseGetPhysicalQuery().GetQueryPlan(); @@ -320,13 +336,28 @@ class TAsyncPrepareYqlResult : public TKqpAsyncResultBase queryCtx, const TKqpQueryRef& query, TMaybe sqlVersion) + TIntrusivePtr queryCtx, const TKqpQueryRef& query, TMaybe sqlVersion, + TIntrusivePtr transformCtx) : TKqpAsyncResultBase(queryRoot, exprCtx, transformer) , QueryCtx(queryCtx) + , ExprCtx(exprCtx) + , TransformCtx(transformCtx) , QueryText(query.Text) , SqlVersion(sqlVersion) {} void FillResult(TResult& prepareResult) const override { + if (!prepareResult.Success()) { + auto exprRoot = GetExprRoot(); + if (TransformCtx && TransformCtx->ExplainTransformerInput) { + exprRoot = TransformCtx->ExplainTransformerInput; + } + if (exprRoot) { + prepareResult.PreparingQuery = std::move(QueryCtx->PreparingQuery); + prepareResult.PreparingQuery->MutablePhysicalQuery()->SetQueryAst(KqpExprToPrettyString(*exprRoot, ExprCtx)); + } + return; + } + YQL_ENSURE(QueryCtx->PrepareOnly); YQL_ENSURE(QueryCtx->PreparingQuery); @@ -344,6 +375,8 @@ class TAsyncPrepareYqlResult : public TKqpAsyncResultBase QueryCtx; + NYql::TExprContext& ExprCtx; + TIntrusivePtr TransformCtx; TString QueryText; TMaybe SqlVersion; }; @@ -933,6 +966,7 @@ class TKqpHost : public IKqpHost { , IsInternalCall(isInternalCall) , FederatedQuerySetup(federatedQuerySetup) , SessionCtx(new TKikimrSessionContext(funcRegistry, config, TAppData::TimeProvider, TAppData::RandomProvider, userToken)) + , Config(config) , TypesCtx(MakeIntrusive()) , PlanBuilder(CreatePlanBuilder(*TypesCtx)) , FakeWorld(ExprCtx->NewWorld(TPosition())) @@ -1265,7 +1299,7 @@ class TKqpHost : public IKqpHost { } return MakeIntrusive(queryExpr.Get(), ctx, *YqlTransformer, SessionCtx->QueryPtr(), - query.Text, sqlVersion); + query.Text, sqlVersion, TransformCtx); } IAsyncQueryResultPtr PrepareDataQueryAstInternal(const TKqpQueryRef& queryAst, const TPrepareSettings& settings, @@ -1327,7 +1361,7 @@ class TKqpHost : public IKqpHost { } return MakeIntrusive(queryExpr.Get(), ctx, *YqlTransformer, SessionCtx->QueryPtr(), - query.Text, sqlVersion); + query.Text, sqlVersion, TransformCtx); } IAsyncQueryResultPtr PrepareScanQueryInternal(const TKqpQueryRef& query, bool isSql, TExprContext& ctx, @@ -1354,7 +1388,7 @@ class TKqpHost : public IKqpHost { } return MakeIntrusive(queryExpr.Get(), ctx, *YqlTransformer, SessionCtx->QueryPtr(), - query.Text, sqlVersion); + query.Text, sqlVersion, TransformCtx); } IAsyncQueryResultPtr PrepareScanQueryAstInternal(const TKqpQueryRef& queryAst, TExprContext& ctx) { @@ -1474,11 +1508,12 @@ class TKqpHost : public IKqpHost { state->CredentialsFactory = FederatedQuerySetup->CredentialsFactory; state->Configuration->WriteThroughDqIntegration = true; state->Configuration->AllowAtomicUploadCommit = queryType == EKikimrQueryType::Script; - state->Configuration->Init(FederatedQuerySetup->S3GatewayConfig, TypesCtx); + state->Gateway = FederatedQuerySetup->HttpGateway; + state->ExecutorPoolId = AppData()->UserPoolId; - auto dataSource = NYql::CreateS3DataSource(state, FederatedQuerySetup->HttpGateway); - auto dataSink = NYql::CreateS3DataSink(state, FederatedQuerySetup->HttpGateway); + auto dataSource = NYql::CreateS3DataSource(state); + auto dataSink = NYql::CreateS3DataSink(state); TypesCtx->AddDataSource(NYql::S3ProviderName, std::move(dataSource)); TypesCtx->AddDataSink(NYql::S3ProviderName, std::move(dataSink)); @@ -1493,6 +1528,7 @@ class TKqpHost : public IKqpHost { TypesCtx.Get(), FuncRegistry, FederatedQuerySetup->DatabaseAsyncResolver, + FederatedQuerySetup->CredentialsFactory, FederatedQuerySetup->ConnectorClient, FederatedQuerySetup->GenericGatewayConfig ); @@ -1502,7 +1538,8 @@ class TKqpHost : public IKqpHost { } void Init(EKikimrQueryType queryType) { - KqpRunner = CreateKqpRunner(Gateway, Cluster, TypesCtx, SessionCtx, *FuncRegistry); + TransformCtx = MakeIntrusive(Config, SessionCtx->QueryPtr(), SessionCtx->TablesPtr()); + KqpRunner = CreateKqpRunner(Gateway, Cluster, TypesCtx, SessionCtx, TransformCtx, *FuncRegistry); ExprCtx->NodesAllocationLimit = SessionCtx->Config()._KqpExprNodesAllocationLimit.Get().GetRef(); ExprCtx->StringsAllocationLimit = SessionCtx->Config()._KqpExprStringsAllocationLimit.Get().GetRef(); @@ -1635,6 +1672,7 @@ class TKqpHost : public IKqpHost { std::optional FederatedQuerySetup; TIntrusivePtr SessionCtx; + TKikimrConfiguration::TPtr Config; TIntrusivePtr FuncRegistryHolder; const NKikimr::NMiniKQL::IFunctionRegistry* FuncRegistry; @@ -1648,6 +1686,7 @@ class TKqpHost : public IKqpHost { TExprNode::TPtr FakeWorld; TIntrusivePtr ExecuteCtx; + TIntrusivePtr TransformCtx; TIntrusivePtr KqpRunner; NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory{NExternalSource::CreateExternalSourceFactory({})}; diff --git a/ydb/core/kqp/host/kqp_host_impl.h b/ydb/core/kqp/host/kqp_host_impl.h index 550f9e2776d3..17110a986926 100644 --- a/ydb/core/kqp/host/kqp_host_impl.h +++ b/ydb/core/kqp/host/kqp_host_impl.h @@ -34,7 +34,9 @@ class TKqpAsyncResultBase : public NYql::IKikimrAsyncResult { YQL_ENSURE(HasResult()); if (Status.GetValue() == NYql::IGraphTransformer::TStatus::Error) { - return NYql::NCommon::ResultFromErrors(ExprCtx.IssueManager.GetIssues()); + TResult result = NYql::NCommon::ResultFromErrors(ExprCtx.IssueManager.GetIssues()); + FillResult(result); + return result; } YQL_ENSURE(Status.GetValue() == NYql::IGraphTransformer::TStatus::Ok); @@ -244,7 +246,7 @@ class IKqpRunner : public TThrRefBase { TIntrusivePtr CreateKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const NMiniKQL::IFunctionRegistry& funcRegistry); + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry); TAutoPtr CreateKqpExplainPreparedTransformer(TIntrusivePtr gateway, const TString& cluster, TIntrusivePtr transformCtx, const NMiniKQL::IFunctionRegistry* funcRegistry, diff --git a/ydb/core/kqp/host/kqp_runner.cpp b/ydb/core/kqp/host/kqp_runner.cpp index 6e0d9b7f98bc..8e113670b9ca 100644 --- a/ydb/core/kqp/host/kqp_runner.cpp +++ b/ydb/core/kqp/host/kqp_runner.cpp @@ -137,14 +137,14 @@ class TKqpRunner : public IKqpRunner { public: TKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const NMiniKQL::IFunctionRegistry& funcRegistry) + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry) : Gateway(gateway) , Cluster(cluster) , TypesCtx(*typesCtx) , SessionCtx(sessionCtx) , FunctionRegistry(funcRegistry) , Config(sessionCtx->ConfigPtr()) - , TransformCtx(MakeIntrusive(Config, sessionCtx->QueryPtr(), sessionCtx->TablesPtr())) + , TransformCtx(transformCtx) , OptimizeCtx(MakeIntrusive(cluster, Config, sessionCtx->QueryPtr(), sessionCtx->TablesPtr())) , BuildQueryCtx(MakeIntrusive()) @@ -377,9 +377,9 @@ class TKqpRunner : public IKqpRunner { TIntrusivePtr CreateKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const NMiniKQL::IFunctionRegistry& funcRegistry) + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry) { - return new TKqpRunner(gateway, cluster, typesCtx, sessionCtx, funcRegistry); + return new TKqpRunner(gateway, cluster, typesCtx, sessionCtx, transformCtx, funcRegistry); } } // namespace NKqp diff --git a/ydb/core/kqp/node_service/ya.make b/ydb/core/kqp/node_service/ya.make index 7dc91a19f358..8ffe88e4925e 100644 --- a/ydb/core/kqp/node_service/ya.make +++ b/ydb/core/kqp/node_service/ya.make @@ -10,6 +10,7 @@ PEERDIR( ydb/core/base ydb/core/cms/console ydb/core/kqp/common + ydb/core/kqp/compute_actor ydb/core/kqp/counters ydb/core/mind ydb/core/protos diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index e53cc909468c..cbe50d797ad3 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -111,8 +112,28 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { } TMaybeNode RewriteAggregate(TExprBase node, TExprContext& ctx) { - TExprBase output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown() || KqpCtx.Config->HasOptUseFinalizeByKey(), KqpCtx.Config->HasOptUseFinalizeByKey()); - DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Ptr(), ctx); + TMaybeNode output; + auto aggregate = node.Cast(); + auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); + if (hopSetting) { + auto input = aggregate.Input().Maybe(); + if (!input) { + return node; + } + output = NHopping::RewriteAsHoppingWindow( + node, + ctx, + input.Cast(), + false, // analyticsHopping + TDuration::MilliSeconds(TDqSettings::TDefault::WatermarksLateArrivalDelayMs), + true, // defaultWatermarksMode + true); // syncActor + } else { + output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown() || KqpCtx.Config->HasOptUseFinalizeByKey(), KqpCtx.Config->HasOptUseFinalizeByKey()); + } + if (output) { + DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Cast().Ptr(), ctx); + } return output; } diff --git a/ydb/core/kqp/proxy_service/kqp_script_executions.cpp b/ydb/core/kqp/proxy_service/kqp_script_executions.cpp index 56a9dae3b58a..a2abb043fc36 100644 --- a/ydb/core/kqp/proxy_service/kqp_script_executions.cpp +++ b/ydb/core/kqp/proxy_service/kqp_script_executions.cpp @@ -478,7 +478,7 @@ class TScriptLeaseUpdater : public TQueryBase { class TScriptLeaseUpdateActor : public TActorBootstrapped { public: - using IRetryPolicy = IRetryPolicy; + using TLeaseUpdateRetryActor = TQueryRetryActor; TScriptLeaseUpdateActor(const TActorId& runScriptActorId, const TString& database, const TString& executionId, TDuration leaseDuration, TIntrusivePtr counters) : RunScriptActorId(runScriptActorId) @@ -489,44 +489,20 @@ class TScriptLeaseUpdateActor : public TActorBootstrapped::max(), LeaseDuration / 2), + Database, ExecutionId, LeaseDuration + )); Become(&TScriptLeaseUpdateActor::StateFunc); } STRICT_STFUNC(StateFunc, hFunc(TEvScriptLeaseUpdateResponse, Handle); - hFunc(NActors::TEvents::TEvWakeup, Wakeup); ) - void Wakeup(NActors::TEvents::TEvWakeup::TPtr&) { - CreateScriptLeaseUpdater(); - } - void Handle(TEvScriptLeaseUpdateResponse::TPtr& ev) { - auto queryStatus = ev->Get()->Status; - if (!ev->Get()->ExecutionEntryExists && queryStatus == Ydb::StatusIds::BAD_REQUEST || queryStatus == Ydb::StatusIds::SUCCESS) { - Reply(std::move(ev)); - return; - } - - if (RetryState == nullptr) { - CreateRetryState(); - } - - const TMaybe delay = RetryState->GetNextRetryDelay(queryStatus); - if (delay) { - Schedule(*delay, new NActors::TEvents::TEvWakeup()); - } else { - Reply(std::move(ev)); - } - } - - void Reply(TEvScriptLeaseUpdateResponse::TPtr&& ev) { if (Counters) { Counters->ReportLeaseUpdateLatency(TInstant::Now() - LeaseUpdateStartTime); } @@ -534,33 +510,6 @@ class TScriptLeaseUpdateActor : public TActorBootstrapped::max(), LeaseDuration / 2); - RetryState = policy->CreateRetryState(); - } - private: TActorId RunScriptActorId; TString Database; @@ -568,7 +517,6 @@ class TScriptLeaseUpdateActor : public TActorBootstrapped Counters; TInstant LeaseUpdateStartTime; - IRetryPolicy::IRetryState::TPtr RetryState = nullptr; }; class TCheckLeaseStatusActorBase : public TActorBootstrapped { @@ -646,9 +594,9 @@ class TCheckLeaseStatusActorBase : public TActorBootstrappedOperationStatus; - FinalExecStatus = ScriptFinalizeRequest->ExecStatus; - FinalIssues = ScriptFinalizeRequest->Issues; + FinalOperationStatus = ScriptFinalizeRequest->Description.OperationStatus; + FinalExecStatus = ScriptFinalizeRequest->Description.ExecStatus; + FinalIssues = ScriptFinalizeRequest->Description.Issues; Send(MakeKqpFinalizeScriptServiceId(SelfId().NodeId()), ScriptFinalizeRequest.release()); } @@ -884,7 +832,6 @@ class TCheckLeaseStatusActor : public TCheckLeaseStatusActorBase { class TForgetScriptExecutionOperationQueryActor : public TQueryBase { static constexpr i64 MAX_NUMBER_ROWS_IN_BATCH = 100000; - static constexpr TDuration MINIMAL_DEADLINE_TIME = TDuration::Seconds(1); struct TResultSetDescription { i64 MaxRowId; @@ -895,7 +842,7 @@ class TForgetScriptExecutionOperationQueryActor : public TQueryBase { TForgetScriptExecutionOperationQueryActor(const TString& executionId, const TString& database, TInstant operationDeadline) : ExecutionId(executionId) , Database(database) - , Deadline(operationDeadline - MINIMAL_DEADLINE_TIME) + , Deadline(operationDeadline) {} void OnRunQuery() override { @@ -1022,10 +969,14 @@ class TForgetScriptExecutionOperationQueryActor : public TQueryBase { Send(Owner, new TEvForgetScriptExecutionOperationResponse(status, std::move(issues))); } + static NYql::TIssues ForgetOperationTimeoutIssues() { + return { NYql::TIssue("Forget script execution operation timeout") }; + } + private: bool CheckDeadline() { if (TInstant::Now() >= Deadline) { - Finish(Ydb::StatusIds::TIMEOUT, "Forget script execution operation timeout"); + Finish(Ydb::StatusIds::TIMEOUT, ForgetOperationTimeoutIssues()); return false; } return true; @@ -1040,6 +991,8 @@ class TForgetScriptExecutionOperationQueryActor : public TQueryBase { class TForgetScriptExecutionOperationActor : public TActorBootstrapped { public: + using TForgetOperationRetryActor = TQueryRetryActor; + explicit TForgetScriptExecutionOperationActor(TEvForgetScriptExecutionOperation::TPtr ev) : Request(std::move(ev)) {} @@ -1075,7 +1028,18 @@ class TForgetScriptExecutionOperationActor : public TActorBootstrappedGet()->Database, Request->Get()->Deadline)); + TDuration minDelay = TDuration::MilliSeconds(10); + TDuration maxTime = Request->Get()->Deadline - TInstant::Now() - TDuration::Seconds(1); + if (maxTime <= minDelay) { + Reply(Ydb::StatusIds::TIMEOUT, TForgetScriptExecutionOperationQueryActor::ForgetOperationTimeoutIssues()); + return; + } + + Register(new TForgetOperationRetryActor( + SelfId(), + TForgetOperationRetryActor::IRetryPolicy::GetExponentialBackoffPolicy(TForgetOperationRetryActor::Retryable, minDelay, TDuration::MilliSeconds(200), TDuration::Seconds(1), std::numeric_limits::max(), maxTime), + ExecutionId, Request->Get()->Database, TInstant::Now() + maxTime + )); } void Handle(TEvForgetScriptExecutionOperationResponse::TPtr& ev) { @@ -1315,6 +1279,7 @@ class TGetScriptExecutionOperationActor : public TCheckLeaseStatusActorBase { Response->Get()->Ready = false; Response->Get()->Status = Ydb::StatusIds::SUCCESS; Response->Get()->Issues.Clear(); + Response->Get()->Metadata.set_exec_status(Ydb::Query::ExecStatus::EXEC_STATUS_UNSPECIFIED); } else { Response->Get()->Ready = true; Response->Get()->Status = GetOperationStatus(); @@ -1756,43 +1721,9 @@ class TSaveScriptExecutionResultMetaQuery : public TQueryBase { const TString SerializedMetas; }; -class TSaveScriptExecutionResultMetaActor : public TActorBootstrapped { -public: - TSaveScriptExecutionResultMetaActor(const NActors::TActorId& replyActorId, const TString& database, const TString& executionId, const TString& serializedMetas) - : ReplyActorId(replyActorId), Database(database), ExecutionId(executionId), SerializedMetas(serializedMetas) - { - } - - void Bootstrap() { - Register(new TSaveScriptExecutionResultMetaQuery(Database, ExecutionId, SerializedMetas)); - - Become(&TSaveScriptExecutionResultMetaActor::StateFunc); - } - - STRICT_STFUNC(StateFunc, - hFunc(TEvSaveScriptResultMetaFinished, Handle); - ) - - void Handle(TEvSaveScriptResultMetaFinished::TPtr& ev) { - if (ev->Get()->Status == Ydb::StatusIds::ABORTED) { - Register(new TSaveScriptExecutionResultMetaQuery(Database, ExecutionId, SerializedMetas)); - return; - } - - Send(ev->Forward(ReplyActorId)); - PassAway(); - } - -private: - const NActors::TActorId ReplyActorId; - const TString Database; - const TString ExecutionId; - const TString SerializedMetas; -}; - class TSaveScriptExecutionResultQuery : public TQueryBase { public: - TSaveScriptExecutionResultQuery(const TString& database, const TString& executionId, i32 resultSetId, TMaybe expireAt, i64 firstRow, Ydb::ResultSet&& resultSet) + TSaveScriptExecutionResultQuery(const TString& database, const TString& executionId, i32 resultSetId, TMaybe expireAt, i64 firstRow, Ydb::ResultSet resultSet) : Database(database), ExecutionId(executionId), ResultSetId(resultSetId), ExpireAt(expireAt), FirstRow(firstRow), ResultSet(std::move(resultSet)) { } @@ -1895,7 +1826,7 @@ class TSaveScriptExecutionResultActor : public TActorBootstrapped, i64, Ydb::ResultSet>(SelfId(), Database, ExecutionId, ResultSetId, ExpireAt, FirstRow, ResultSets.back())); FirstRow += numberRows; ResultSets.pop_back(); @@ -1977,13 +1908,13 @@ class TGetScriptExecutionResultQuery : public TQueryBase { AND execution_id = $execution_id AND (expire_at > CurrentUtcTimestamp() OR expire_at IS NULL); - SELECT row_id, result_set + SELECT database, execution_id, result_set_id, row_id, result_set FROM `.metadata/result_sets` WHERE database = $database AND execution_id = $execution_id AND result_set_id = $result_set_id AND row_id >= $offset - ORDER BY row_id + ORDER BY database, execution_id, result_set_id, row_id LIMIT $limit; )"; @@ -2203,8 +2134,8 @@ class TGetScriptExecutionResultActor : public TActorBootstrappedGet()->Database) + .Utf8(Request.Database) .Build() .AddParam("$execution_id") - .Utf8(Request->Get()->ExecutionId) + .Utf8(Request.ExecutionId) .Build() .AddParam("$customer_supplied_id") - .Utf8(Request->Get()->CustomerSuppliedId) + .Utf8(Request.CustomerSuppliedId) .Build() .AddParam("$user_token") - .Utf8(Request->Get()->UserToken) + .Utf8(Request.UserToken) .Build() .AddParam("$script_sinks") - .JsonDocument(SerializeSinks(Request->Get()->Sinks)) + .JsonDocument(SerializeSinks(Request.Sinks)) .Build() .AddParam("$script_secret_names") - .JsonDocument(SerializeSecretNames(Request->Get()->SecretNames)) + .JsonDocument(SerializeSecretNames(Request.SecretNames)) .Build(); RunDataQuery(sql, ¶ms); @@ -2255,7 +2186,7 @@ class TSaveScriptExternalEffectActor : public TQueryBase { } void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { - Send(Request->Sender, new TEvSaveScriptExternalEffectResponse(status, std::move(issues))); + Send(Owner, new TEvSaveScriptExternalEffectResponse(status, std::move(issues))); } private: @@ -2292,14 +2223,16 @@ class TSaveScriptExternalEffectActor : public TQueryBase { } private: - TEvSaveScriptExternalEffectRequest::TPtr Request; + TEvSaveScriptExternalEffectRequest::TDescription Request; }; class TSaveScriptFinalStatusActor : public TQueryBase { public: - explicit TSaveScriptFinalStatusActor(TEvScriptFinalizeRequest::TPtr ev) - : Request(ev) - {} + explicit TSaveScriptFinalStatusActor(const TEvScriptFinalizeRequest::TDescription& request) + : Request(request) + { + Response = std::make_unique(); + } void OnRunQuery() override { TString sql = R"( @@ -2328,10 +2261,10 @@ class TSaveScriptFinalStatusActor : public TQueryBase { NYdb::TParamsBuilder params; params .AddParam("$database") - .Utf8(Request->Get()->Database) + .Utf8(Request.Database) .Build() .AddParam("$execution_id") - .Utf8(Request->Get()->ExecutionId) + .Utf8(Request.ExecutionId) .Build(); RunDataQuery(sql, ¶ms, TTxControl::BeginTx()); @@ -2354,16 +2287,16 @@ class TSaveScriptFinalStatusActor : public TQueryBase { TMaybe finalizationStatus = result.ColumnParser("finalization_status").GetOptionalInt32(); if (finalizationStatus) { - if (Request->Get()->FinalizationStatus != *finalizationStatus) { + if (Request.FinalizationStatus != *finalizationStatus) { Finish(Ydb::StatusIds::PRECONDITION_FAILED, "Execution already have different finalization status"); return; } - ApplicateScriptExternalEffectRequired = true; + Response->ApplicateScriptExternalEffectRequired = true; } TMaybe operationStatus = result.ColumnParser("operation_status").GetOptionalInt32(); - if (Request->Get()->LeaseGeneration && !operationStatus) { + if (Request.LeaseGeneration && !operationStatus) { NYdb::TResultSetParser leaseResult(ResultSets[1]); if (leaseResult.RowsCount() == 0) { Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected operation state"); @@ -2378,7 +2311,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { return; } - if (*Request->Get()->LeaseGeneration != static_cast(*leaseGenerationInDatabase)) { + if (*Request.LeaseGeneration != static_cast(*leaseGenerationInDatabase)) { Finish(Ydb::StatusIds::PRECONDITION_FAILED, "Lease was lost"); return; } @@ -2386,12 +2319,12 @@ class TSaveScriptFinalStatusActor : public TQueryBase { TMaybe customerSuppliedId = result.ColumnParser("customer_supplied_id").GetOptionalUtf8(); if (customerSuppliedId) { - CustomerSuppliedId = *customerSuppliedId; + Response->CustomerSuppliedId = *customerSuppliedId; } TMaybe userToken = result.ColumnParser("user_token").GetOptionalUtf8(); if (userToken) { - UserToken = *userToken; + Response->UserToken = *userToken; } SerializedSinks = result.ColumnParser("script_sinks").GetOptionalJsonDocument(); @@ -2408,7 +2341,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { NKqpProto::TKqpExternalSink sink; NProtobufJson::Json2Proto(*serializedSink, sink); - Sinks.push_back(sink); + Response->Sinks.push_back(sink); } } @@ -2424,7 +2357,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { const NJson::TJsonValue* serializedSecretName; value.GetValuePointer(i, &serializedSecretName); - SecretNames.push_back(serializedSecretName->GetString()); + Response->SecretNames.push_back(serializedSecretName->GetString()); } } @@ -2443,12 +2376,12 @@ class TSaveScriptFinalStatusActor : public TQueryBase { if (operationStatus) { FinalStatusAlreadySaved = true; - OperationAlreadyFinalized = !finalizationStatus; + Response->OperationAlreadyFinalized = !finalizationStatus; CommitTransaction(); return; } - ApplicateScriptExternalEffectRequired = ApplicateScriptExternalEffectRequired || HasExternalEffect(); + Response->ApplicateScriptExternalEffectRequired = Response->ApplicateScriptExternalEffectRequired || HasExternalEffect(); FinishScriptExecution(); } @@ -2493,10 +2426,10 @@ class TSaveScriptFinalStatusActor : public TQueryBase { )"; TString serializedStats = "{}"; - if (Request->Get()->QueryStats) { + if (Request.QueryStats) { NJson::TJsonValue statsJson; Ydb::TableStats::QueryStats queryStats; - NGRpcService::FillQueryStats(queryStats, *Request->Get()->QueryStats); + NGRpcService::FillQueryStats(queryStats, *Request.QueryStats); NProtobufJson::Proto2Json(queryStats, statsJson, NProtobufJson::TProto2JsonConfig()); serializedStats = NJson::WriteJson(statsJson); } @@ -2504,40 +2437,40 @@ class TSaveScriptFinalStatusActor : public TQueryBase { NYdb::TParamsBuilder params; params .AddParam("$database") - .Utf8(Request->Get()->Database) + .Utf8(Request.Database) .Build() .AddParam("$execution_id") - .Utf8(Request->Get()->ExecutionId) + .Utf8(Request.ExecutionId) .Build() .AddParam("$operation_status") - .Int32(Request->Get()->OperationStatus) + .Int32(Request.OperationStatus) .Build() .AddParam("$execution_status") - .Int32(Request->Get()->ExecStatus) + .Int32(Request.ExecStatus) .Build() .AddParam("$finalization_status") - .Int32(Request->Get()->FinalizationStatus) + .Int32(Request.FinalizationStatus) .Build() .AddParam("$issues") - .JsonDocument(SerializeIssues(Request->Get()->Issues)) + .JsonDocument(SerializeIssues(Request.Issues)) .Build() .AddParam("$plan") - .JsonDocument(Request->Get()->QueryPlan.value_or("{}")) + .JsonDocument(Request.QueryPlan.value_or("{}")) .Build() .AddParam("$stats") .JsonDocument(serializedStats) .Build() .AddParam("$ast") - .Utf8(Request->Get()->QueryAst.value_or("")) + .Utf8(Request.QueryAst.value_or("")) .Build() .AddParam("$operation_ttl") .Interval(static_cast(OperationTtl.MicroSeconds())) .Build() .AddParam("$customer_supplied_id") - .Utf8(CustomerSuppliedId) + .Utf8(Response->CustomerSuppliedId) .Build() .AddParam("$user_token") - .Utf8(UserToken) + .Utf8(Response->UserToken) .Build() .AddParam("$script_sinks") .OptionalJsonDocument(SerializedSinks) @@ -2546,7 +2479,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { .OptionalJsonDocument(SerializedSecretNames) .Build() .AddParam("$applicate_script_external_effect_required") - .Bool(ApplicateScriptExternalEffectRequired) + .Bool(Response->ApplicateScriptExternalEffectRequired) .Build(); RunDataQuery(sql, ¶ms, TTxControl::ContinueAndCommitTx()); @@ -2559,42 +2492,31 @@ class TSaveScriptFinalStatusActor : public TQueryBase { void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { if (!FinalStatusAlreadySaved) { - KQP_PROXY_LOG_D("Finish script execution operation. ExecutionId: " << Request->Get()->ExecutionId - << ". " << Ydb::StatusIds::StatusCode_Name(Request->Get()->OperationStatus) - << ". Issues: " << Request->Get()->Issues.ToOneLineString() << ". Plan: " << Request->Get()->QueryPlan.value_or("")); - } - - if (!ApplicateScriptExternalEffectRequired || status != Ydb::StatusIds::SUCCESS) { - Send(Owner, new TEvScriptExecutionFinished(OperationAlreadyFinalized, status, issues)); - return; + KQP_PROXY_LOG_D("Finish script execution operation. ExecutionId: " << Request.ExecutionId + << ". " << Ydb::StatusIds::StatusCode_Name(Request.OperationStatus) + << ". Issues: " << Request.Issues.ToOneLineString() << ". Plan: " << Request.QueryPlan.value_or("")); } - auto response = std::make_unique(CustomerSuppliedId, UserToken); - response->Sinks = std::move(Sinks); - response->SecretNames = std::move(SecretNames); + Response->Status = status; + Response->Issues = std::move(issues); - Send(Owner, response.release()); + Send(Owner, Response.release()); } private: bool HasExternalEffect() const { - return !Sinks.empty(); + return !Response->Sinks.empty(); } private: - TEvScriptFinalizeRequest::TPtr Request; + TEvScriptFinalizeRequest::TDescription Request; + std::unique_ptr Response; - bool OperationAlreadyFinalized = false; bool FinalStatusAlreadySaved = false; - bool ApplicateScriptExternalEffectRequired = false; TDuration OperationTtl; - TString CustomerSuppliedId; - TString UserToken; TMaybe SerializedSinks; - std::vector Sinks; TMaybe SerializedSecretNames; - std::vector SecretNames; }; class TScriptFinalizationFinisherActor : public TQueryBase { @@ -2830,7 +2752,7 @@ NActors::IActor* CreateScriptLeaseUpdateActor(const TActorId& runScriptActorId, } NActors::IActor* CreateSaveScriptExecutionResultMetaActor(const NActors::TActorId& runScriptActorId, const TString& database, const TString& executionId, const TString& serializedMeta) { - return new TSaveScriptExecutionResultMetaActor(runScriptActorId, database, executionId, serializedMeta); + return new TQueryRetryActor(runScriptActorId, database, executionId, serializedMeta); } NActors::IActor* CreateSaveScriptExecutionResultActor(const NActors::TActorId& runScriptActorId, const TString& database, const TString& executionId, i32 resultSetId, TMaybe expireAt, i64 firstRow, Ydb::ResultSet&& resultSet) { @@ -2842,15 +2764,15 @@ NActors::IActor* CreateGetScriptExecutionResultActor(const NActors::TActorId& re } NActors::IActor* CreateSaveScriptExternalEffectActor(TEvSaveScriptExternalEffectRequest::TPtr ev) { - return new TSaveScriptExternalEffectActor(std::move(ev)); + return new TQueryRetryActor(ev->Sender, ev->Get()->Description); } -NActors::IActor* CreateSaveScriptFinalStatusActor(TEvScriptFinalizeRequest::TPtr ev) { - return new TSaveScriptFinalStatusActor(std::move(ev)); +NActors::IActor* CreateSaveScriptFinalStatusActor(const NActors::TActorId& finalizationActorId, TEvScriptFinalizeRequest::TPtr ev) { + return new TQueryRetryActor(finalizationActorId, ev->Get()->Description); } -NActors::IActor* CreateScriptFinalizationFinisherActor(const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues) { - return new TScriptFinalizationFinisherActor(executionId, database, operationStatus, std::move(operationIssues)); +NActors::IActor* CreateScriptFinalizationFinisherActor(const NActors::TActorId& finalizationActorId, const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues) { + return new TQueryRetryActor, NYql::TIssues>(finalizationActorId, executionId, database, operationStatus, operationIssues); } NActors::IActor* CreateScriptProgressActor(const TString& executionId, const TString& database, const TString& queryPlan, const TString& queryStats) { diff --git a/ydb/core/kqp/proxy_service/kqp_script_executions.h b/ydb/core/kqp/proxy_service/kqp_script_executions.h index 5781046a1df7..ea4fae00e842 100644 --- a/ydb/core/kqp/proxy_service/kqp_script_executions.h +++ b/ydb/core/kqp/proxy_service/kqp_script_executions.h @@ -33,8 +33,8 @@ NActors::IActor* CreateGetScriptExecutionResultActor(const NActors::TActorId& re // Compute external effects and updates status in database NActors::IActor* CreateSaveScriptExternalEffectActor(TEvSaveScriptExternalEffectRequest::TPtr ev); -NActors::IActor* CreateSaveScriptFinalStatusActor(TEvScriptFinalizeRequest::TPtr ev); -NActors::IActor* CreateScriptFinalizationFinisherActor(const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues); +NActors::IActor* CreateSaveScriptFinalStatusActor(const NActors::TActorId& finalizationActorId, TEvScriptFinalizeRequest::TPtr ev); +NActors::IActor* CreateScriptFinalizationFinisherActor(const NActors::TActorId& finalizationActorId, const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues); NActors::IActor* CreateScriptProgressActor(const TString& executionId, const TString& database, const TString& queryPlan, const TString& queryStats); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 3e75c1b9ba42..608147bd557b 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -947,11 +947,6 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { NYql::IDqIntegration* dqIntegration = provider->second->GetDqIntegration(); YQL_ENSURE(dqIntegration, "Unsupported dq source for provider: \"" << dataSourceCategory << "\""); auto& externalSource = *protoSource->MutableExternalSource(); - google::protobuf::Any& settings = *externalSource.MutableSettings(); - TString& sourceType = *externalSource.MutableType(); - dqIntegration->FillSourceSettings(source.Ref(), settings, sourceType); - YQL_ENSURE(!settings.type_url().empty(), "Data source provider \"" << dataSourceCategory << "\" did't fill dq source settings for its dq source node"); - YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceCategory << "\" did't fill dq source settings type for its dq source node"); // Partitioning TVector partitionParams; @@ -976,6 +971,12 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { externalSource.SetAuthInfo(CreateStructuredTokenParser(token).ToBuilder().RemoveSecrets().ToJson()); CreateStructuredTokenParser(token).ListReferences(SecretNames); } + + google::protobuf::Any& settings = *externalSource.MutableSettings(); + TString& sourceType = *externalSource.MutableType(); + dqIntegration->FillSourceSettings(source.Ref(), settings, sourceType, maxTasksPerStage); + YQL_ENSURE(!settings.type_url().empty(), "Data source provider \"" << dataSourceCategory << "\" didn't fill dq source settings for its dq source node"); + YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceCategory << "\" didn't fill dq source settings type for its dq source node"); } } diff --git a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp index 76c06908c638..e346725ef5ea 100644 --- a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp +++ b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp @@ -206,10 +206,6 @@ class TRunScriptActor : public NActors::TActorBootstrapped { WaitFinalizationRequest = true; RunState = IsExecuting() ? ERunState::Finishing : RunState; - if (RunState == ERunState::Cancelling) { - Issues.AddIssue("Script execution is cancelled"); - } - auto scriptFinalizeRequest = std::make_unique( GetFinalizationStatusFromRunState(), ExecutionId, Database, Status, GetExecStatusFromStatusCode(Status), Issues, std::move(QueryStats), std::move(QueryPlan), std::move(QueryAst), LeaseGeneration diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 2d43845429eb..6bf41c2962ee 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1758,10 +1758,17 @@ class TKqpSessionActor : public TActorBootstrapped { const auto& phyQuery = QueryState->PreparedQuery->GetPhysicalQuery(); FillColumnsMeta(phyQuery, response); - } else if (compileResult->Status == Ydb::StatusIds::TIMEOUT && QueryState->QueryDeadlines.CancelAt) { - // The compile timeout cause cancelation execution of request. - // So in case of cancel after we can reply with canceled status - ev.SetYdbStatus(Ydb::StatusIds::CANCELLED); + } else { + if (compileResult->Status == Ydb::StatusIds::TIMEOUT && QueryState->QueryDeadlines.CancelAt) { + // The compile timeout cause cancelation execution of request. + // So in case of cancel after we can reply with canceled status + ev.SetYdbStatus(Ydb::StatusIds::CANCELLED); + } + + auto& preparedQuery = compileResult->PreparedQuery; + if (preparedQuery && QueryState->ReportStats() && QueryState->GetStatsMode() >= Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL) { + response.SetQueryAst(preparedQuery->GetPhysicalQuery().GetQueryAst()); + } } } diff --git a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp b/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp deleted file mode 100644 index 7b426cbdd468..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "ch_recipe_ut_helpers.h" - -#include -#include - -namespace NTestUtils { - - TString GetChHost() { - return "localhost"; - } - - ui32 GetChPort() { - return 19000; - } - - TString GetChUser() { - return "user"; - } - - TString GetChPassword() { - return "password"; - } - - TString GetChDatabase() { - return "default"; - } - - NClickHouse::TClient CreateClickhouseClient() { - NClickHouse::TClientOptions opt; - opt - .SetHost(GetChHost()) - .SetPort(GetChPort()) - .SetUser(GetChUser()) - .SetPassword(GetChPassword()); - - TInstant start = TInstant::Now(); - ui32 attempt = 0; - while ((TInstant::Now() - start).Seconds() < 60) { - attempt += 1; - try { - return NClickHouse::TClient(opt); - } catch (const TSystemError& e) { - Cerr << "Attempt " << attempt << ": " << e.what() << Endl; - Sleep(TDuration::MilliSeconds(100)); - } - } - - throw yexception() << "Failed to connect ClickHouse in " << attempt << " attempt(s)"; - } - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h b/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h deleted file mode 100644 index c8f573979b51..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -namespace NTestUtils { - - TString GetChHost(); - ui32 GetChPort(); - TString GetChUser(); - TString GetChPassword(); - TString GetChDatabase(); - - NClickHouse::TClient CreateClickhouseClient(); - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp b/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp deleted file mode 100644 index 9b1ea42d4be5..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "connector_recipe_ut_helpers.h" - -#include -#include - -namespace NTestUtils { - - TString GetConnectorHost() { - return "localhost"; - } - - ui32 GetConnectorPort() { - return 50051; - } - - std::shared_ptr MakeKikimrRunnerWithConnector() { - NYql::TGenericConnectorConfig clientCfg; - clientCfg.MutableEndpoint()->set_host(GetConnectorHost()); - clientCfg.MutableEndpoint()->set_port(GetConnectorPort()); - - NKikimrConfig::TAppConfig appCfg; - appCfg.MutableFeatureFlags()->SetEnableExternalDataSources(true); - - auto kikimr = NKikimr::NKqp::NFederatedQueryTest::MakeKikimrRunner( - NYql::IHTTPGateway::Make(), - NYql::NConnector::MakeClientGRPC(clientCfg), - nullptr, - appCfg); - kikimr->GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableExternalDataSources(true); - return kikimr; - } - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h b/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h deleted file mode 100644 index e0ff53b228c6..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include -#include -#include - -#include - -namespace NTestUtils { - - TString GetConnectorHost(); - ui32 GetConnectorPort(); - - std::shared_ptr MakeKikimrRunnerWithConnector(); - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/docker-compose.yml b/ydb/core/kqp/ut/federated_query/generic/docker-compose.yml deleted file mode 100644 index 2991e0e883a1..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/docker-compose.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: '3.4' -services: - postgresql: - image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 - environment: - POSTGRES_DB: db - POSTGRES_USER: user - POSTGRES_PASSWORD: password - ports: - - 15432:5432 - clickhouse: - image: clickhouse/clickhouse-server:23-alpine@sha256:b078c1cd294632afa2aeba3530e7ba2e568513da23304354f455a25fab575c06 - environment: - CLICKHOUSE_DB: db - CLICKHOUSE_USER: user - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 - CLICKHOUSE_PASSWORD: password - ports: - - 19000:9000 - - 18123:8123 - fq-connector-go: - image: ghcr.io/ydb-platform/fq-connector-go:v0.0.6-rc.8@sha256:74ebae0530d916c1842a7fddfbddc6c018763a0401f2f627a44e8829692fe41f - ports: - - 50051:50051 - network_mode: host diff --git a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp b/ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp deleted file mode 100644 index e849c7251c04..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp +++ /dev/null @@ -1,179 +0,0 @@ -#include "ch_recipe_ut_helpers.h" -#include "connector_recipe_ut_helpers.h" -#include "pg_recipe_ut_helpers.h" -#include -#include -#include - -#include - -#include -#include - -#include - -using namespace NYdb; -using namespace NYdb::NQuery; -using namespace NTestUtils; -using namespace fmt::literals; - -Y_UNIT_TEST_SUITE(KqpGenericPlanTest) { - Y_UNIT_TEST(PgSource) { - pqxx::connection pgConnection = CreatePostgresqlConnection(); - - { - pqxx::work work{pgConnection}; - const TString sql = R"sql( - CREATE TABLE pg_table_plan_test ( - key INT4 PRIMARY KEY, - name TEXT, - value INT4 - ) - )sql"; - work.exec(sql); - work.commit(); - } - - std::shared_ptr kikimr = MakeKikimrRunnerWithConnector(); - - auto tableCLient = kikimr->GetTableClient(); - auto session = tableCLient.CreateSession().GetValueSync().GetSession(); - - // external tables to pg/ch - { - const TString sql = fmt::format( - R"sql( - CREATE OBJECT pg_password_obj (TYPE SECRET) WITH (value="{pg_password}"); - CREATE EXTERNAL DATA SOURCE pg_data_source WITH ( - SOURCE_TYPE="PostgreSQL", - LOCATION="{pg_host}:{pg_port}", - DATABASE_NAME="{pg_database}", - USE_TLS="FALSE", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{pg_user}", - PASSWORD_SECRET_NAME="pg_password_obj" - ); - )sql", - "pg_host"_a = GetPgHost(), - "pg_port"_a = GetPgPort(), - "pg_user"_a = GetPgUser(), - "pg_password"_a = GetPgPassword(), - "pg_database"_a = GetPgDatabase()); - auto result = session.ExecuteSchemeQuery(sql).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } - - const TString sql = R"sql( - PRAGMA generic.UsePredicatePushdown="true"; - SELECT * FROM pg_data_source.pg_table_plan_test - WHERE key > 42 AND value <> 0 - )sql"; - - auto queryClient = kikimr->GetQueryClient(); - TExecuteQueryResult queryResult = queryClient.ExecuteQuery( - sql, - TTxControl::BeginTx().CommitTx(), - TExecuteQuerySettings().ExecMode(EExecMode::Explain)) - .GetValueSync(); - - UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); - UNIT_ASSERT(queryResult.GetStats()); - UNIT_ASSERT(queryResult.GetStats()->GetPlan()); - Cerr << "Plan: " << *queryResult.GetStats()->GetPlan() << Endl; - NJson::TJsonValue plan; - UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); - - const auto& stagePlan = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]; - UNIT_ASSERT_VALUES_EQUAL(stagePlan["Node Type"].GetStringSafe(), "Source"); - const auto& sourceOp = stagePlan["Operators"].GetArraySafe()[0]; - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ExternalDataSource"].GetStringSafe(), "pg_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Database"].GetStringSafe(), GetPgDatabase()); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Protocol"].GetStringSafe(), "Native"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Table"].GetStringSafe(), "pg_table_plan_test"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Name"].GetStringSafe(), "Read pg_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["SourceType"].GetStringSafe(), "PostgreSql"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[0].GetStringSafe(), "key"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[1].GetStringSafe(), "name"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[2].GetStringSafe(), "value"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Filter"].GetStringSafe(), "item.key > 42 And item.value != 0"); - } - - Y_UNIT_TEST(ChSource) { - NClickHouse::TClient chClient = CreateClickhouseClient(); - - // ch_table_plan_test - { - const TString sql = R"sql( - CREATE TABLE ch_table_plan_test ( - key INT PRIMARY KEY, - name TEXT NULL - ) - ENGINE = MergeTree - )sql"; - chClient.Execute(sql); - } - - std::shared_ptr kikimr = MakeKikimrRunnerWithConnector(); - - auto tableCLient = kikimr->GetTableClient(); - auto session = tableCLient.CreateSession().GetValueSync().GetSession(); - - // external tables to pg/ch - { - const TString sql = fmt::format( - R"sql( - CREATE OBJECT ch_password_obj (TYPE SECRET) WITH (value="{ch_password}"); - CREATE EXTERNAL DATA SOURCE ch_data_source WITH ( - SOURCE_TYPE="ClickHouse", - LOCATION="{ch_host}:{ch_port}", - DATABASE_NAME="{ch_database}", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{ch_user}", - PASSWORD_SECRET_NAME="ch_password_obj" - ); - )sql", - "ch_host"_a = GetChHost(), - "ch_port"_a = GetChPort(), - "ch_database"_a = GetChDatabase(), - "ch_user"_a = GetChUser(), - "ch_password"_a = GetChPassword()); - auto result = session.ExecuteSchemeQuery(sql).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } - - const TString sql = R"sql( - PRAGMA generic.UsePredicatePushdown="true"; - SELECT * FROM ch_data_source.ch_table_plan_test - WHERE name IS NOT NULL - )sql"; - - auto queryClient = kikimr->GetQueryClient(); - TExecuteQueryResult queryResult = queryClient.ExecuteQuery( - sql, - TTxControl::BeginTx().CommitTx(), - TExecuteQuerySettings().ExecMode(EExecMode::Explain)) - .GetValueSync(); - - UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); - UNIT_ASSERT(queryResult.GetStats()); - UNIT_ASSERT(queryResult.GetStats()->GetPlan()); - Cerr << "Plan: " << *queryResult.GetStats()->GetPlan() << Endl; - NJson::TJsonValue plan; - UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); - - const auto& stagePlan = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]; - UNIT_ASSERT_VALUES_EQUAL(stagePlan["Node Type"].GetStringSafe(), "Source"); - const auto& sourceOp = stagePlan["Operators"].GetArraySafe()[0]; - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ExternalDataSource"].GetStringSafe(), "ch_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Database"].GetStringSafe(), GetChDatabase()); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Protocol"].GetStringSafe(), "Native"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Table"].GetStringSafe(), "ch_table_plan_test"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Name"].GetStringSafe(), "Read ch_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["SourceType"].GetStringSafe(), "ClickHouse"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[0].GetStringSafe(), "key"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[1].GetStringSafe(), "name"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Filter"].GetStringSafe(), "Exist(item.name)"); - } -} diff --git a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp b/ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp deleted file mode 100644 index 4c88715f2af6..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include "ch_recipe_ut_helpers.h" -#include "connector_recipe_ut_helpers.h" -#include "pg_recipe_ut_helpers.h" - -#include - -#include - -#include - -using namespace NTestUtils; -using namespace fmt::literals; - -Y_UNIT_TEST_SUITE(FederatedQueryJoin) { - Y_UNIT_TEST(InnerJoinChPg) { - pqxx::connection pgConnection = CreatePostgresqlConnection(); - NClickHouse::TClient chClient = CreateClickhouseClient(); - - // pg_table_inner_join_test - { - pqxx::work work{pgConnection}; - const TString sql = R"sql( - CREATE TABLE pg_table_inner_join_test ( - key INT PRIMARY KEY, - name TEXT - ) - )sql"; - work.exec(sql); - - const TString insertData = R"sql( - INSERT INTO pg_table_inner_join_test - (key, name) - VALUES - (1, 'A'), - (2, 'B'), - (1000, 'C'); - )sql"; - work.exec(insertData); - - work.commit(); - } - - // ch_table_inner_join_test - { - const TString sql = R"sql( - CREATE TABLE ch_table_inner_join_test ( - key INT PRIMARY KEY, - name TEXT - ) - ENGINE = MergeTree - )sql"; - chClient.Execute(sql); - - const TString insertData = R"sql( - INSERT INTO ch_table_inner_join_test - (key, name) - VALUES - (1, 'X'), - (3, 'Y'), - (1000, 'Z'); - )sql"; - chClient.Execute(insertData); - } - - std::shared_ptr kikimr = MakeKikimrRunnerWithConnector(); - auto queryClient = kikimr->GetQueryClient(); - - // external tables to pg/ch - { - const TString sql = fmt::format( - R"sql( - CREATE OBJECT pg_password_obj (TYPE SECRET) WITH (value="{pg_password}"); - CREATE EXTERNAL DATA SOURCE pg_data_source WITH ( - SOURCE_TYPE="PostgreSQL", - LOCATION="{pg_host}:{pg_port}", - DATABASE_NAME="{pg_database}", - USE_TLS="FALSE", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{pg_user}", - PASSWORD_SECRET_NAME="pg_password_obj" - ); - - CREATE OBJECT ch_password_obj (TYPE SECRET) WITH (value="{ch_password}"); - CREATE EXTERNAL DATA SOURCE ch_data_source WITH ( - SOURCE_TYPE="ClickHouse", - LOCATION="{ch_host}:{ch_port}", - DATABASE_NAME="{ch_database}", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{ch_user}", - PASSWORD_SECRET_NAME="ch_password_obj" - ); - )sql", - "pg_host"_a = GetPgHost(), - "pg_port"_a = GetPgPort(), - "pg_user"_a = GetPgUser(), - "pg_password"_a = GetPgPassword(), - "pg_database"_a = GetPgDatabase(), - "ch_host"_a = GetChHost(), - "ch_port"_a = GetChPort(), - "ch_database"_a = GetChDatabase(), - "ch_user"_a = GetChUser(), - "ch_password"_a = GetChPassword()); - auto result = queryClient.ExecuteQuery(sql, NYdb::NQuery::TTxControl::NoTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } - - // join - const TString sql = R"sql( - SELECT pg.* FROM ch_data_source.ch_table_inner_join_test AS ch - INNER JOIN pg_data_source.pg_table_inner_join_test AS pg - ON ch.key = pg.key - WHERE ch.key > 998 - )sql"; - - auto result = queryClient.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - // results - auto resultSet = result.GetResultSetParser(0); - UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 1); - UNIT_ASSERT(resultSet.TryNextRow()); - - const TMaybe key = resultSet.ColumnParser("key").GetOptionalInt32(); - UNIT_ASSERT(key); - UNIT_ASSERT_VALUES_EQUAL(*key, 1000); - - const TMaybe name = resultSet.ColumnParser("name").GetOptionalUtf8(); - UNIT_ASSERT(name); - UNIT_ASSERT_VALUES_EQUAL(name, "C"); - } -} diff --git a/ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json b/ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json deleted file mode 100644 index bdae23d53676..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "clickhouse_clusters": { - "ch-managed": { - "hosts": [ - { - "name": "ch-managed-1", - "cluster_id": "ch-managed", - "health": 1 - } - ] - } - } -} diff --git a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp b/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp deleted file mode 100644 index 12b7ee6103e3..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "pg_recipe_ut_helpers.h" - -#include -#include - -#include - -using namespace fmt::literals; - -namespace NTestUtils { - - TString GetPgHost() { - return "localhost"; - } - - ui32 GetPgPort() { - return 15432; - } - - TString GetPgUser() { - return "user"; - } - - TString GetPgDatabase() { - return "db"; - } - - TString GetPgPassword() { - return "password"; - } - - pqxx::connection CreatePostgresqlConnection() { - const TString connectionString = fmt::format( - "host={host} port={port} dbname={database} user={user} password={password}", - "host"_a = GetPgHost(), - "port"_a = GetPgPort(), - "database"_a = GetPgDatabase(), - "user"_a = GetPgUser(), - "password"_a = GetPgPassword()); - - TInstant start = TInstant::Now(); - ui32 attempt = 0; - while ((TInstant::Now() - start).Seconds() < 60) { - attempt += 1; - try { - return pqxx::connection{connectionString}; - } catch (const pqxx::broken_connection& e) { - Cerr << "Attempt " << attempt << ": " << e.what() << Endl; - Sleep(TDuration::MilliSeconds(100)); - } - } - - throw yexception() << "Failed to connect PostgreSQL in " << attempt << " attempt(s)"; - } - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h b/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h deleted file mode 100644 index 0e46e1ac3dc0..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include - -#include - -namespace NTestUtils { - - TString GetPgHost(); - ui32 GetPgPort(); - TString GetPgUser(); - TString GetPgDatabase(); - TString GetPgPassword(); - - pqxx::connection CreatePostgresqlConnection(); - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp b/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp index 8ec1995b5e53..a57c404fa5a8 100644 --- a/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp @@ -74,6 +74,22 @@ namespace NKikimr::NKqp { return settings; } + std::shared_ptr MakeDatabaseAsyncResolver(EProviderType providerType) { + std::shared_ptr databaseAsyncResolverMock; + + switch (providerType) { + case EProviderType::ClickHouse: + // We test access to managed databases only on the example of ClickHouse + databaseAsyncResolverMock = std::make_shared(); + databaseAsyncResolverMock->AddClickHouseCluster(); + break; + default: + break; + } + + return databaseAsyncResolverMock; + } + Y_UNIT_TEST_SUITE(GenericFederatedQuery) { void TestSelectAllFields(EProviderType providerType) { // prepare mock @@ -108,7 +124,6 @@ namespace NKikimr::NKqp { // step 3: ReadSplits std::vector colData = {10, 20, 30, 40, 50}; clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select() @@ -125,11 +140,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); @@ -162,15 +173,15 @@ namespace NKikimr::NKqp { MATCH_RESULT_WITH_INPUT(colData, resultSet, GetUint16); } - Y_UNIT_TEST(PostgreSQLLocal) { + Y_UNIT_TEST(PostgreSQLOnPremSelectAll) { TestSelectAllFields(EProviderType::PostgreSQL); } - Y_UNIT_TEST(ClickHouseManaged) { + Y_UNIT_TEST(ClickHouseManagedSelectAll) { TestSelectAllFields(EProviderType::ClickHouse); } - Y_UNIT_TEST(YdbManaged) { + Y_UNIT_TEST(YdbManagedSelectAll) { TestSelectAllFields(EProviderType::Ydb); } @@ -208,7 +219,6 @@ namespace NKikimr::NKqp { // step 3: ReadSplits clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select() @@ -222,11 +232,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); @@ -258,7 +264,7 @@ namespace NKikimr::NKqp { } } - Y_UNIT_TEST(PostgreSQLSelectConstant) { + Y_UNIT_TEST(PostgreSQLOnPremSelectConstant) { TestSelectConstant(EProviderType::PostgreSQL); } @@ -304,7 +310,6 @@ namespace NKikimr::NKqp { // step 3: ReadSplits clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select() @@ -318,11 +323,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); @@ -413,7 +414,6 @@ namespace NKikimr::NKqp { std::vector filterColumnData = {42, 24}; // clang-format off clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select(select) @@ -426,11 +426,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); diff --git a/ydb/core/kqp/ut/federated_query/ya.make b/ydb/core/kqp/ut/federated_query/ya.make index d09e0e44937f..a0defaea0fea 100644 --- a/ydb/core/kqp/ut/federated_query/ya.make +++ b/ydb/core/kqp/ut/federated_query/ya.make @@ -1,6 +1,5 @@ RECURSE_FOR_TESTS( common - generic generic_ut s3 style diff --git a/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp b/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp index 2000c73ad025..6aa539e02e5f 100644 --- a/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp @@ -89,6 +89,107 @@ Y_UNIT_TEST_SUITE(KqpAgg) { [["Value3"];[1]] ])", FormatResultSetYson(result.GetResultSet(0))); } + + Y_UNIT_TEST(AggWithHop) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + SELECT + Text, + CAST(COUNT(*) as Int32) as Count, + SUM(Data) + FROM EightShard + GROUP BY HOP(CAST(Key AS Timestamp?), "PT1M", "PT1M", "PT1M"), Text + ORDER BY Text; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [["Value1"];[8];[15]]; + [["Value2"];[8];[16]]; + [["Value3"];[8];[17]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + + Y_UNIT_TEST(GroupByLimit) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + CREATE TABLE `TestTable` ( + a Uint64, + b Uint64, + c Uint64, + d Uint64, + e Uint64, + PRIMARY KEY (a, b, c) + ); + )").GetValueSync()); + + AssertSuccessResult(session.ExecuteDataQuery(R"( + REPLACE INTO `TestTable` (a, b, c, d, e) VALUES + (1, 11, 21, 31, 41), + (2, 12, 22, 32, 42), + (3, 13, 23, 33, 43); + )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync()); + + + { // query with 36 groups and limit 32 + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + PRAGMA GroupByLimit = '32'; + + SELECT a, b, c, d, SUM(e) Data FROM TestTable + GROUP BY ROLLUP(a, b, c, d, a * b AS ab, b * c AS bc, c * d AS cd, a + b AS sum) + ORDER BY a, b, c, d; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + } + + { // query with 36 groups (without explicit limit) + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + SELECT a, b, c, d, SUM(e) Data FROM TestTable + GROUP BY ROLLUP(a, b, c, d, a * b AS ab, b * c AS bc, c * d AS cd, a + b AS sum) + ORDER BY a, b, c, d; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [#;#;#;#;[126u]]; + [[1u];#;#;#;[41u]]; + [[1u];[11u];#;#;[41u]]; + [[1u];[11u];[21u];#;[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[2u];#;#;#;[42u]]; + [[2u];[12u];#;#;[42u]]; + [[2u];[12u];[22u];#;[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[3u];#;#;#;[43u]]; + [[3u];[13u];#;#;[43u]]; + [[3u];[13u];[23u];#;[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 3b7692a6ad30..a2fc437604ff 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -5025,6 +5025,54 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/*"); } + Y_UNIT_TEST(CreateExternalTableWithUpperCaseSettings) { + TKikimrRunner kikimr; + kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableExternalDataSources(true); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + TString externalDataSourceName = "/Root/ExternalDataSource"; + TString externalTableName = "/Root/ExternalTable"; + auto query = TStringBuilder() << R"( + CREATE EXTERNAL DATA SOURCE `)" << externalDataSourceName << R"(` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + CREATE EXTERNAL TABLE `)" << externalTableName << R"(` ( + Key Uint64, + Value String, + Year Int64 NOT NULL, + Month Int64 NOT NULL + ) WITH ( + DATA_SOURCE=")" << externalDataSourceName << R"(", + LOCATION="/folder1/*", + FORMAT="json_as_string", + `projection.enabled`="true", + `projection.Year.type`="integer", + `projection.Year.min`="2010", + `projection.Year.max`="2022", + `projection.Year.interval`="1", + `projection.Month.type`="integer", + `projection.Month.min`="1", + `projection.Month.max`="12", + `projection.Month.interval`="1", + `projection.Month.digits`="2", + `storage.location.template`="${Year}/${Month}", + PARTITIONED_BY = "[Year, Month]" + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + auto externalTableDesc = Navigate(runtime, runtime.AllocateEdgeActor(), externalTableName, NKikimr::NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + const auto& externalTable = externalTableDesc->ResultSet.at(0); + UNIT_ASSERT_EQUAL(externalTable.Kind, NKikimr::NSchemeCache::TSchemeCacheNavigate::EKind::KindExternalTable); + UNIT_ASSERT(externalTable.ExternalTableInfo); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.ColumnsSize(), 4); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetDataSourcePath(), externalDataSourceName); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/*"); + } + Y_UNIT_TEST(DoubleCreateExternalTable) { TKikimrRunner kikimr; kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableExternalDataSources(true); diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index aa8009bcacab..84afe3c52cb7 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -628,6 +628,29 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL(totalTasks, 2); } + Y_UNIT_TEST(ExecStatsAst) { + auto kikimr = DefaultKikimrRunner(); + auto db = kikimr.GetQueryClient(); + + auto settings = TExecuteQuerySettings() + .StatsMode(EStatsMode::Full); + + std::vector> cases = { + { "SELECT 42 AS test_ast_column", EStatus::SUCCESS }, + { "SELECT test_ast_column FROM TwoShard", EStatus::GENERIC_ERROR }, + { "SELECT UNWRAP(42 / 0) AS test_ast_column", EStatus::PRECONDITION_FAILED }, + }; + + for (const auto& [sql, status] : cases) { + auto result = db.ExecuteQuery(sql, TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), status, result.GetIssues().ToString()); + + UNIT_ASSERT(result.GetStats().Defined()); + UNIT_ASSERT(result.GetStats()->GetAst().Defined()); + UNIT_ASSERT_STRING_CONTAINS(*result.GetStats()->GetAst(), "test_ast_column"); + } + } + Y_UNIT_TEST(Ddl) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); diff --git a/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp index 5fdb3ed11809..9b17d3cfede0 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp @@ -202,6 +202,32 @@ Y_UNIT_TEST_SUITE(KqpQueryServiceScripts) { ])", FormatResultSetYson(results.GetResultSet())); } + Y_UNIT_TEST(ExecuteScriptWithParameters) { + auto kikimr = DefaultKikimrRunner(); + auto db = kikimr.GetQueryClient(); + + auto params = TParamsBuilder() + .AddParam("$value").Int64(17).Build() + .Build(); + + auto scriptExecutionOperation = db.ExecuteScript(R"( + DECLARE $value As Int64; + SELECT $value; + )", params).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr.GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + + TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync(); + UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString()); + + CompareYson(R"([ + [17] + ])", FormatResultSetYson(results.GetResultSet())); + } void ExecuteScriptWithStatsMode(Ydb::Query::StatsMode statsMode) { auto kikimr = DefaultKikimrRunner(); @@ -394,14 +420,13 @@ Y_UNIT_TEST_SUITE(KqpQueryServiceScripts) { i32 successCount = 0; for (auto& f : forgetFutures) { auto forgetStatus = f.ExtractValueSync(); - UNIT_ASSERT_C(forgetStatus.GetStatus() == NYdb::EStatus::SUCCESS || forgetStatus.GetStatus() == NYdb::EStatus::NOT_FOUND || - forgetStatus.GetStatus() == NYdb::EStatus::ABORTED, forgetStatus.GetIssues().ToString()); + UNIT_ASSERT_C(forgetStatus.GetStatus() == NYdb::EStatus::SUCCESS || forgetStatus.GetStatus() == NYdb::EStatus::NOT_FOUND, forgetStatus.GetIssues().ToString()); if (forgetStatus.GetStatus() == NYdb::EStatus::SUCCESS) { ++successCount; } } - UNIT_ASSERT(successCount == 1); + UNIT_ASSERT(successCount >= 1); auto op = opClient.Get(scriptExecutionOperation.Id()).ExtractValueSync(); auto forgetStatus = opClient.Forget(scriptExecutionOperation.Id()).ExtractValueSync(); diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 978d3e3b1f97..16624c6101c0 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -860,11 +860,11 @@ namespace Tests { ); std::shared_ptr databaseAsyncResolver; - if (queryServiceConfig.GetGeneric().HasMdbGateway() && queryServiceConfig.HasMdbTransformHost()) { + if (queryServiceConfig.GetGeneric().HasMdbGateway() || queryServiceConfig.GetGeneric().HasYdbMvpEndpoint()) { databaseAsyncResolver = std::make_shared( Runtime->GetActorSystem(nodeIdx), databaseResolverActorId, - "", + queryServiceConfig.GetGeneric().GetYdbMvpEndpoint(), queryServiceConfig.GetGeneric().GetMdbGateway(), NFq::MakeMdbEndpointGeneratorGeneric(queryServiceConfig.GetMdbTransformHost()) ); diff --git a/ydb/docs/ru/core/concepts/datamodel/external_data_source.md b/ydb/docs/ru/core/concepts/datamodel/external_data_source.md index 69c036ac4e06..62d8fd1526a5 100644 --- a/ydb/docs/ru/core/concepts/datamodel/external_data_source.md +++ b/ydb/docs/ru/core/concepts/datamodel/external_data_source.md @@ -2,7 +2,7 @@ {% note warning %} -Данная функциональность находится в режиме "Preview". +Данная функциональность находится в режиме "Experimental". {% endnote %} diff --git a/ydb/docs/ru/core/concepts/federated_query/_assets/architecture.png b/ydb/docs/ru/core/concepts/federated_query/_assets/architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..6a835c2680338038a34e2328bed11304b52c25ea GIT binary patch literal 30414 zcmeFZ1z1(xn>UPrl+q>AoriA8Lzf`kedz8E0qIg{kPxIxx_A#P+I!!iUIvDsQZ)xZ10sQvh5v zqz68UIyl%sG$6)umM+lw96Zb%+|XO(q*WE-#KhVjdW)Ealf5l4 zBFV2MlT%ft~E#ogq%ZI6DV38{6HuijkR-ljR>vL(k?0adNh_x4SzU8#6aE zFbcf@aWOLgHEJU74>wDpj3GIFp`us4O+01KOX{92ZcgBKtdbnB*`zeadjd43I;JNeli<3->+TT8vUMpcM@j{BU5|# zU-$nU`jaca==htOPWJY|dVetO&&$6j*?NdUY@o#cO~^7i%)z#vpbtspKg zo_8{00R2qMNv2$_qgx>Vm zmp|@;Ui zHv6AskmJ`m|K~CuP!A_hC5V$HKzqQN{}P=41Zntf?;MQ&3&r8zX74Xi`z>&yPEJNp zS^F(k4)&IIF3wPP|HoJPy_T(q67+6hwL3q-&U$BUp!x;XAkP1=_4#v`|Kau7S-E}* z>c6iJf3D8~_A5+TxKI?Z(|P?a%y%CQzHnknF%n(*~Q7; z8uG^#ZWCUJu^DjX*Ykkr#RYQ5*MG&W{C~A^cPsM#lb}G^13jhtAI|x1$l?#b{9B%W zOYeWFQM{A2KN#^p%p3lv7{xn(=<6{vqdfcoEXk#_;{e}8HCU-fGL z3Ip$A;s03-ygTO~wpQ#{@&TC2|4fT(W(MIl`ByEjDG#49;4%MeEH3+R+j-}ke_P=H z&KCF1aQwj{C8>YG8z3S3nJSxj+E@aahZ6@t(0|7#8vn{shG_AaP{}l zpMI3<&bR&_@uPRv%XHqN9K2D%Tgg zoge)xM)?1fy8g^t{$CKQ-x<)~*x;X;&Cjd9(a2vH?rwl$!oTuLS3K8K7#K1b8F5in z5B=>7M3>hRXFZw63lHL+(gy2`;euI|aa4tbFva1;JD68U8K)|8C@AQ1j73FKKY8zw zQQrg0!G3SqvpW>#@F9E^7pL%k`f`YG&y%PBwYUEfjlj^{S&W3Eqa&N9bO8L_PrEb{ z4&70F>Z`EhIn&%#8gdXV@^?@AK9z4G*-XJ;>M~fm2$a>yoT*vf_MQPWZuQ!Vn%bb! z1i|#3`K9k0aPCs}9=u+idVZ&$816qn0hJz_np26u{`%3TiF^4fAo+_$SDv1P%i(>~ zZ|`0ch=$Rcd!tWj&UW;CLQp?^{~{p)wFG3Vq?`LZM>;Tvg8H=tN`WfMg{!_1o)ZfD z;4#D&5+oAagLG&UVOVZM$U?2QW!aTs({|( z_SwQf9Cc%ff&O9C+KJPe_vBB6iTUmm^f5~(Q83dp;;ndZQgo>gA`p+!kTuVkJhK?L zvYJZ%O%@Ryc=n^u4%}-P!(BvRE4!TS+BZ66KDw|=J6K38>z6Z+iw67-QL5t8aYJ)U zc)QGXL(jEw$m?eiPsAxR6V*=18I&w6gToX4qV#4&z*OrjZVg(#30h8lJ<{;$F46&- zM1}jKZCj!(E6wOD+~sZL6dnh`3s}26y##JA{tJqnZi`^<4Tljwx@yjzp@_c-J3~eG zd2I*L-GZg7g;KbFQqw;bf7dtJj9rGCYrv?kNx`u!9chv8_f9rOsUvu7Z=ECMAY zQT?EbVp&q25H(+p18$a#Sdn1(rbzwMaeRt4KGi zue+bH$$=5{7_~)=?cLW`klH8o)yJG{M2UBF9*PbKbDhC#KnTsL`|xp$P%t$*ASk*@ z#xM+c^igH#(%Mp>qn_D^EC0YN9Bmr57MJIDcRtK)UoM`l-1)!&@5L>VF9~O4X#_c! z)ad+>JM@Ss=9bWH;?#rR_|4F=M7A1gK#A;~NKz?rz*6kr9PO01_bhAfvYKU6+JYAdyw~VZ$Ev2K;VRzQ zDJT`7RNj6w4!&^B87?M!s2 zCrT-ssXftI7;GA~H*domlE|h=V@rFdkS640!HG##q{_~AZ|yh4_8p4Mncis(4&NkA zkaN+JfE}J$s68NN{EG@-UI$=kgRc3%N*+qksiC-7^Aia%-*W8?FU{cM7CTURRS6tb zh)Kgg)nAm!hB#D-2mQ8P;>bwZxSNZ(qHjJrE**M(!!ga$QzGR&w6#4ZS*L#w;XaR= z!wJ3!l@o2aoC2eO-ezj?|F%t%U=dKY_N8lxw7&zV7e|*Pmp-$f1^ySpUCCsSe#wwt zjBxlMa=wzPZj_zEivb+G93=~a{va6+nP0ZE9S7+(p0@ineNXf^y7bs5Va*?GI34VR z)$h$OZ!THx??%{u*a)r)YxE_$KOg|_#U-}Y{>Gugk-0JhJh+q}kMQ!B0tI}>sMoQW zKTuH175}#LwVucywkU^!pbmEh8v)Skiucv*bTBENh4qO(By9ch%7)<_gtH-lrmEaD zZRd`%7Zko_`z^PiI=SDh3k;pQ4_GsW7B_tv*5D&Pj z$vu)Dl! z6t_aEoLT+)7zVx9%Qf*>6{)x@>Jb&Bn}UxKP3yw)6DOh$ExcE)h*%~o?`)F>Um%XQ z8in!wzl@G8=BTPg0*PXmU3h>exyQ>YJMuc#;%~jorz8&VotvDen>xA|h`H5ID{8A> zB;?iLn5!ppS5&?aDU!Yqd6qr*P8FB_n_jjuSsg3(^aa5oYF|3+0Uv6}!1YVE;EE%+ zgP>SzsGTkfc@8etuGdWYtr%I{@KzMn1F^Kh8pdW!?1wL9k#ir?r(3CXStYzGeUiSd zyS%bQUdPaqVV(3f5?sv9@uBuFdpk~%g_^B49dHq$+dz@0v?@22r&_2{hQq!v_~{dt zdSb0ph+K(>mX22E%qN7?+NTbcAi6z^0_6IC06&b*m{U!Q?5^vm7{|tb5NpJ3AljHth}6u10D*+`oT+ zd8cl%N1;3j>0~b=(NFK=bG9Rkly1(?GQUouLK~Nw8bQcow*(yc@J({ECdHdC6h+s9 zEG&w}nqP8?eDVP&=2RmGKO}szg9HRD%I1FuEHyN7adAo%Il*xZprS5w*C^HRfx3En zF#)U_9!El)G}L#}q4%0G=Wk&==&9}X}kvzZ{^*<=C^XDdID z_?LGC{{Ipdpc&ujunaq#DQvLz=G7~ePnk_kPp)rnAQ+b|yIky92+_c1Lj}E0Me>yU zCpHsGKfw+zEa=}xv1xO`4AJ~8hLmIr6SGb3_Ib|-Iy>P?wQIndnwtH3prZ8j^dcTj z;t3&_4U9(jJ$*?-PjBx*s&}9B6+^WR3l*|Pi#q!aCx07?VffR%#aP?f+I~4CoM*un z44`+x^;k)orR*=TFfjx5n%rbm_NXi9s%6_=Sw7^DBm8aj+L6U2C5_K^=bdyE=DvRY zn%AB2#Jwj=G>Fw^iZwVil&~H@A#P?qX|Qq z^%_;{B{a2kY#)@G>w6+Bv@SUDCMbtY5wd9>4QgLFwZAlI74<0h57}Y@y-KSbJdG!A+W|<1_$#uyouTMFQdDY}B|EsK5y6S9EB%ftjt*y{>EV2+jMj|J z99P8Gjn_O(21kPY?t8k~8NKMFy&tXF91tGA6z=3v^P39nbHd zZx)xE3KiepTzw5hB#DESqgpjBOb;jvo*0s;s`$D`%>6M`;uF7nO{gH3is=&B@2#ds_#fH^pk7sW#Q9D|pa-}rN2 z^r1x-S_d(k;W>;??Xk+ohvUj16c8xTl;@Y|zEcpP!or2MK3M5i)@(UniG1|Lt;F() z9^!(fjz#I*v9nBb0+Ly53qhp4&<%oj?Oykj%+?^po@hx)FNDUrjlgH1;frG?7W^mM|(`mH`?TtZ4nM!s7gK@ecFRz|bMEEbv@WX?^cpIXRS5WG}8dmjQG)HI85`xTgd;R_ru z)(}G~KANn~s zAS6N1Mvw4AwmwTeClMU&k9(<{Cd43YK-5`So!8_6^UKaHH)td0E|sko>M+d(EN6$5 zo9;>4PrC@NWFS4Y%RYQxdnU=|nrKIp)%X?%H;Z+h#z{FQTRI*Tl4zfVZn!?)+9+7* z2SRGz|M1}abk;sTB}FSzf2x`RIHDTn$_Q}8hp8fSr-<>czNor{4};CWB3HP7btFd+ zG=fAiN?t~fOE*@-7k%J7KhZ;;?1_C8dm(A}Jq(l5VQT_JB7* zZJZ$IEs8EV3Jxfy+`)DVq_XdWr6SVu&nIDB2gR$|~7vyi7^vQ4+1 z!JV~~7NIIM)kZ`$8qb+YJZk&e6)pH^+Ne<-J80rVF2A+c!33Lb*ppz5P#>9WHS^cg zY{(k{0Z2Y=?>~_;x3(-#F6$9T^^v=XlCkVC*%A2=>F=zy=YRImduUl}66S(7-{jt# z%4H4p3RSCF4|-MJWAC55;Thx4dXhS^mla8V!u|rR6;QhF2Ub~Be>8sUhyrREf<>gh z2f~!TS;wX#@w`Va=x;2an3()nmZ$FNu;|kn8O|H~m)XgtFWr1Dzvp}0UaUK=4&ZXy z%}r7Ha#H2H>FNe`YSd*bZfJV&j&o3bt~MLImD`GYlNM%vIUE{PiXDE<`=P za#i~M@!f_#jc?@5{jBHp=UY}A{G|B>njw^9X^ud`-#K%QPyBor{)XdG>+qN}3|7Gt z*d+hM_}LdHIwOTmqEl7+NMY^gjI*Ix`e!W5V@@yhZCFD^XKQUEp@HgPB4}-?O!Pb% zz@-wT@F@g&2zH)dX2siE2UQevOMat&@3{2I3xJcd?2j-vrd=##+;OyP$Omhwvdm%Q zUXw{DHTH`>fynw?{gr*q*)Ol?joHf zk`l{B+t!`X<-G57Al8(%;NX5do_&9YK(=J`E*0#W!R>KNjpz|Fg7D{$enPW81;cug zV>En$)mO+2R^$NKCNDya{Qyv`jk`5Yr;N4(i^b109Au)|QaG~-* zeJMsJ>$x{)U_xHP?ZaZjZ$r(hx^hCX$=~R3=$xV0keGICP@8wB><%=Qq|areQW6A= z!1z~)O$~kZrd`kF*zx)vQn}ueAAOzH+G|?&RXyk&3*I`k@A)nb09YGfqUH7=bE}nc> z{o~A}lQ15<6U9}z`Cv_IAorwaMC;e8>>H4K)e0n?+0p*I01g+N|7z{P6i1uO4`(V4 z{#Y>pEce4%DG3k2Xa_+_xKPY(2b zv3mjTshgcUvo}xuR1<)qkxV2`0Qq?0%Fw_k1ON$D!Y#~VVhF}?ETRZ$W>iJ_hG$CN zMD`P91og#L8dI#v>OF5sCGV?31~==FbrpL?`@9`3!sO?dh^ zD23yo8{fbXh-;GhU5nhFnA`IMFv?`jH)#N*Bjch)rsqQjb`|JxRnFZ#J&a27RA}Gj zI55fC7r?(HuPN*dOZ0?W-R(z{jghtO10BXE4a6Hex^)Kz zINCi&L}SWW^^rei4+l`T%teJX?&Xg>i{e&R4B|HGK)7qYNbgM+XoUgTUj!y~>U(08 zz!@z#a`e3Es;!1qO7wkiP9`;5ytum9p0KnWV`j|cZ#Fu-HbmM#RAn7bOK!~h*V@qX3%7IVE z5pBbWfLanAjJ=hrH0{S?(y4uDJ5wDGy9Fr6WHa0ZRUkhFu&R{dGzfX)Iq)22x-I*M zD5c(2u|PVJ>3-Se+@q^O*NSbW_yG+)}qb*-svN2u>T{g9fIti=O3>)?o#N;Q2o)_& z6KX@$MUBXNo~A^j&Yn_~cv40^oVX63Jc>ykw&@x#!L<4htAIB_4Rz*G9a{ekpZZlRLzHntSU-I zK>^$3w)3*4%dKy&!5QLU4|y(^24*Cu#}&+ljMT9|;lVM5%qI{Es=kqP=p4N{|*ipVUtDmAAPOwO2@)8};rPe4P(j zG}()P#an;-#Kme>nQ^3%9=UXT+eTbMBB23Da;e&eKYmPT=6Bns!;V;RU-VVhy*NL& z2!UoZzF1kKpJ;t!6ku18hy*+^1ig;q7HcXh;)*G6Z*P?y!ZpSR5ihM&EN-eVSp+VJ zwlpsfbmd8Sl;1YctI~}^`b_Gb1QGU{1@w9x_3B-4ofm$foHPqMdR?(NRMd5r!S!OY zxjp8uwj(!bCh@l5Jv@BC>Nt`1-gY~@ZFzGg*HN(2>5Kc2YPt|qLMi1W( zV@?#bIPkL)2KodBmnMHKW`ibrjgrR*7bYj~h~14xh5R3&qeZsA5u?r1^xEzJVfbNc z*U_p;y@tXirU&l;m_|zQ?HgMlh3Bv)8{U$aO6{8lm0k=fNj(8E&BJ$vyZ(ljn zm-WtpJIg3540lUjmUeDlj^}$zZC`#RRESe6jv+?^Rbo+heQqh6t+OA@j9gdMYi@4d z#Zs|ygxK2~fY3_hVSD5Qz=O%=v9)Pb_b|^HSDp>v*DRqRBX?|-x5Y@2`r>FL&bAkS zf3&~w&SDz%o^;^278V;`?s>SQ&iQl{(s z<2xZW>}IjML5B)U`IvO#lyf$iP!l050WNOz^F6+-@-Uk5=Q38$kOfnDfgBTW@KV&9yHa^(11rkpdcd?(Z}`0bA{G9&nL(;?_m|F zz#2bTJzO2EcUncUTWAhXPbcyW_uv1L$=Dd_zuz@s^c@$rCVLaqO{0)%*vY6-O;f$* zf143i!~gy}qKhTzJIo}}uWx$dRo3H38m`JN^+A-Q4SJ8b%dd09MM|{~@AW*~(@@rE z`E)WkxwjG@564wTTYD+)qY8n1%evZMKO-Fkb-?uwt6F3>+WtJ<&GopzwfV7Obh29h zOAKv;GQ`#yr404daoi7j&GQV(5n+S<{m8?^!w!y)ffzEoySw}Q`*BIy(uI3C&r;dU zKv)>?^*P<^0q|BbS=GtPkqm%$SO?h6-ctO74g2gcvwM;vttPDV5`a%l^-B*WT8gM^ zAzj=nWclVbG}Yot7h)>rgI`c7Lc|~vHZ91Nv;#{&1cC%ZpOWp(bTk!tzmUwb4pl$D zS`T^rfUai0NHz{NbDybpPcUM>m~o-Z+1*(*l7`$JcKnQ3?DnK-rzvT)sMMgwCs#K0 zL3Yi74HEYaHbtF)#Iwb2bMmh4$vl>mGug%NqSqd8x&{(GXhKHui;Ytg2h>Yt{S7|1 zYR1A3MFVcR;Un)&DOVr>U77$uBYTYO+~?1q9afg!G;3$SA(%7`MrW>4QLA+V_hIg-VYjI7dhWkH?o$rSypKb&tvwbHQ zIQZ~H^K_7(Dsz#1ky{o2*+`zHJwYb*^DQo1rPPTup({Zq{mRQS$H609ohUg!?Vw!e z*+KY%d~-9=sSSh0x(|;^XD;*l1+10xnN6w>kB$ayDAjB8l3%`b((p+<*S}EKkKzK{ zHkG8Dj~V$rXKEk}D&tUW$?$pWc5i^5ld}FVjC7^F#%y$pZ`lT4cwbiuV*5GKHVw~N*4m=-X$4i46`RA86W@gQ`4&4;o^pd5=VH$cyb;QWJme<| z0rvW$$*#Gp(0bJP!wC^Deo;@-{w|fRG{?cKe;pH6hsMFq{>{TUq4rwm#b&v$uWyYp zHsFNBpfo7p5NiUm{Vo*{5SkUMmwxDu#Loi(D0GO&eZ6nz1iTJeQF>!;n8jKYT7``s z-rgp|>3qpTzFq`@Y8qgqR(}ul9qi7LJOH^CxU|)CMl>jVUfor94Q!ITk}0yWw({YPnuEJ5edS(eq{jXx2R3AZIo`o_b>^ey?Q|NurUCqD6Zckhj4k-! zP$f27VUWh+(FV(B@Ru(vwu1Ps4LNxr!oBfW5A0_djKwaEh?S*f%oKR?Va)Lp;@?KG z+!YlS^^$wpq(x|fc-7^3O3A zHR?%o3#-$g{p00o|Kun{G|7l{IC z$m0sl@oJlcQ>}Le$4ql2TIWHR^+x03axHwBQ+K16a^`ZK#N_3IPDB0DFo? z=8V)y=5Vs9R&6t_Y}TjW;6&XMT`X|bV5F&Ag9|`@9PDe4fuui8(DCU8aG+>XzjM)d z#+c8yvO{k_W~#R=Xr{qeivhv)6Qb$JKx_=pO7TRuXI<8-6!x+7I4B^MCUczFWL>Jf(z|n2-J}c8Qw~>IY0Z5`I7hq#Y1m88r)!*OGt{6*CEe6mtX1)BK zy0XRi>$wa!om!jt=hpL$uI#oE(nv#K*hU(W>^$ogBE@z17pR0ZCcNU;FOXhy<$<#y zvQ?gSEC3*zvIwvVXGHjiz&rVr=hSkE7WpawF7p@~^kU!?6VL|oEc#&Ex11ynRWKhy zQna&2*=q)VIcANs^RHrE$@sf+6-(ur8P<}TFmba{fDw26<(eR zNTA^ifaz3M7y@z%l>9&uEzxi`CEJE3kOw-}Po}h|JEs>=-^2(3fYFQ4w>)#J<}9U` zV%d3W>DxnbNMUMBfG+D_;OQW~)1<4GcU1pz5)PJYI`UrWLY99cK)}q*>=dFyTvs^% zY)VHDL%Lr^MBa*XS_+ zB6JZ?3)r`c1xljDg+?Y~)MU{7HRzj;)Vpgho{+?Bp}?8SNT^&5o>2^QqoSiHIqKNA z-MGy^{Ge|wPR1dOYHori6PS>iMt4=Pj)z2=1mn>J|6x$e9WiPh*NhVAI*`$F3>&FO zJE)-pJJg_WhD!>8xDMmNa+LBVrZ({J6w+uk-k0e%z{<(VnQ$1Gc+*#NNbV)cusaU8J8pkJi2qKFo-c z%g)0d@*)&R0_~yMe`x9?^s4GmMSLnR|GHRvXQrkTz=!3>6@t#>;A=?h-xElc*(Ft~ z+>$7a*6Zs4=qh2!4$wnFU(Y-*6bOb0J+8lRVmPHq@LBB{Ak9;Sxkg;nWDCscK>7LU z8-eiCo!QB1!nYPu*$|x1swR~5)mBkY-@e%7P)q{pyyzwM*L}-fk}C*{oKmUjX-fY7 z0sGSKM&v2E@6z|&rqYfXuN+m}73FJW-5BBfka(=+XoM{Pji8E(3cAl+fE>wGwRB(t z_AcR}18%QlvRC-!!o4@-=$-l99Pf6ErY|&fRRapzyxa4Sy!Wl1%x+&#CUl-`P4)r^ z4rBlkZyu8u-~FMrA}E0#59m0P5TPy>ZLBI-qr|0iay;x|o|BD~6x&mZ`hISjvhx(B zG_?vM2S-)meP(yOGmF3CU$o{>uw63_GP2eny(Pwfv}7}yJA=_T64rHc^CQGy(O_xj zJbuQdyY`HXOg}X|Hnul~!@N%h5njCHb4(u>FHQ@tUChG9m<>FUjXY((mg+TfdaNKk z4MC6&f`uM!`!Bsv5hZl7!>fHUT zZ70|H=z%w`qGYQBXVMXuC<;CE0W~Ud%r7_x*`OsupW`v9zJUQ_EEQsbd!glay|oXD zF*c@gG9IG_=Y=GoNsiBB$fU6G65a4@m+Tz4bP1$%0@~ny&G!-wDXcC`-|qqvCZgfk znq8QbUT+(A;Z1f|D0#PArgh=&dIneFXdlx~uJdugUhfGbdru?|D}b4a0CBUcV6#>< zE$pK3;w?)0w%{m_OG*DF^FQ^u(LDKVwmIcIB1nrK|8Zo5hY%KQu}YgE(zzhqIWv6R zoyR9{bGS+^-C7NQFNpgCpF=WVeF+b#iHQm3qepZ&*J6;mt||7hy+*{j21 z3Ug!S9Eamgyk^#Y6BN*^Oj{1;3iFXS6_u48WqK55{XVj0$K~}k)HhVQLA*}xFMXv~R+C$YQ|ZIe2#*_R)zUH( zGGj`Al(Z+ur~pkpJ>8Fc#Iv5gmE0J6i|BWKAZ5a?!&qz9Yx5i_Mq{n3qozZo{`@2~ zNXlAs){{d?pE0;*F+MeNZ~*a~~};wzr$GFic@c@@XqQD*^Zsp3!Z*j*B)-SbJa@A-G0lH&O-obL*rk`i9imvChIul%)veR(K)#FN z#Z92CZULHiKr=6+daG4|&R_j7pgi>A#-y~(vT(j{C3DO<$^Ps9G+`f$>QW^+Xsoiq zvTgj?n<)TUgaVvE&UO#zp(rO-s-?My#YzafQqO!)_cdOMqyt^}+K}6CYvvhwwRmeA zcc9>Cb?f&vK2^F06{~Zx7zAIvc?fUIN0lQ>S~JqJTt6c;u8ka2i<8mT!RVmrfo;J2 z@1m`uD_-bpZ%*$wT>x?8YMNNSVx}Ze@a^_qSjf09DS=FXD1awPSyn*H_#&>zVQ)Qv z%zsKKoz)2JnpWk93pAFZ0F7K^BWt`4o`OKT_b77#`s0gQZZ9{_2%8jCAxfV6Mz_n# zN@YT|`!S<4j@M=fGc98trqbR$rUW;tNv?)Rfzorb^1a@lhbZfhfc_K_RaG3I)5_kM zL&hDze_TYfDk_x9tioSWcQq6~6dh%Dtr@lYK8Z_l(=oR35uK}9o6nWDN?*SC=uq!Sp5STo7|?DU;G>=@6hO3NP-LtX`f zspn1K=ifAPl2KB^K?@G%2{(s>EUtVwS5P4`f(4-KICD}nb91@M?j!UXp6R@hi46Tn zM@l}F^{;3Y<*bCLbA;JyBJTD4kNi?8Dit1OIN&|2i+wAS5-L(|WRa}#z z+RVSDjkjy@0j6H6RV&Z{dgSUnYiYTZl>`8l9&#{$oar^%$78-RwGGEJE z(LVI-qLPvgq#m}h!T*H#=IO~E7}&*|3l|N?q=}A=83t+mbiNm8$YHP14NB%hv**FA zR086mlv{oU`Ym3f-^Ln!F5d4Vu7?34MwZw>n$6=;-L zKtifKS^RN}pZ_%DLnKQp9o(v~??*=AZ0=VB!5A3G+H1K)r~M`<4d{rqY4!_0Y@zvwT0F`?4!A*~kU_bx z37jWlXG8evFcEK}~~Xw7#=~NIEHUUJtK22tg^eT4rAuc*k7rwlfnF z7KRi>z<~fjG3*QBIjaDZ5EPcCDovDaO7&Z`dje59#>)+agKDQoql&{v8TNuO^utps zfiffmbsT%HLy-bzvD(wxU7&F{Br+&;uISjUOwWb`h!M@Y!e&z6k1EsQD4X6KjR;S* z`aa=z-82>F5K{q~OQPc20VpBcJ; z9Oi8(7ky#WKq1(JSXv@8%IP$GpjK}#DJ=m=0q3nlTR%1VQ6~myHS_N}=E$OdP^w|z zi@VqMLujH*FB0(3-w|1*fEPtrpTN@7(`(k+5Oh@`U;( z?tIfKDd<@jV;d8+y~t1+{_yay!fu{_d%6m7WiKIY)ydU0;kle?%HCpIn~Z51exVB< zNIRI%dChR(HT`2`W)z@{PV7=|JQehgs@iNQncTlpr^bpJZb=K;_0>L#oG##r1-PPD zIdZxyL=xo90A?%{CO){b2l(1#d151ZYDUHffq{V-PaYZ%y4*Dq?>0C|WdmKu@+0*< zwW-fXWo2bw%6(ahO$hT{(t7b&QJ4|vrFw*MuX2^^`aEer<(ZiCs8_G93~d zy?SHtQYx>bF#*si4YX57t}CybJSraLcU*x_WYVH22g1y+Ut@)PfQAa!;aE6mA6S48 z(B*Fk)Ec{^33c{gUB_pzS%A|E&*wk!Av+C91uPnY6$`yczDx!#XV zF%qbefY{>EQMUrE$C4^Me|VmsPqan)D;owx%`DKqR@QIg=`Yq(e2%%jCBq=bhBc1Bz6MoU;E;AuX6`ji^B z7|<)p?zEnt^l3##8hZt_kF8`R){(QD(+@C~he4U%W-{bwD*aqAC=<#JJkCewqtLE($o|u%Hab`Ah zX&R5cEbs@}n(TqHKEl&l%g>T)Jzm?Dy`!zZ-fxZM#Gvhk?Q_UsK{XP?td(@+vJyZq z`0em6mPvIKnIt0+=8JjP1(s$Aa_@@#+%)Yxs|c zoq>YD>rqNnu+JYSmRP7!4U4BO?1(LW--#BkBj!n61o=JyRp$Dv4H4MPH>#l&197jM zoA2duM;y7>x9m}Yhcgx=w2yzjHbo7OM$E7J(nk$c(WUc}BD`=aL+JsND;x@l5<}bg z{pt2Op-RtU?7CT7^!JhBjyeNS`*U~w?oSD6qfb^AvA>MZ1|)>jj7<)g60+HTplisX zRvr z?%lSQ6xDW%5goq7k5|U}6-{=pM`VL109hWaB9_T7J&sxSaX<+_jU0 z)9TX)3%@;}7q_eMgbnzMd6Ph6h-|PLfEgp>R}#~y7j=yTm76kcf{)l@KN*%~1&fGI zA|6X3nA@Hz=v^Dt!O-?xtrg$4++seWC1lm$M*v(CI+@Tew~dYMC|NuDP3UQFeRyp9 z=h>>R;E+9*E}l$KN6!u6XoXB?6H&A04$TV}!iauF#h#-dXS(AsoCQXz_QZ*2wDil| z2xnFy&CR;J{QUhqnBB9%d66_=MoK~444rf1p?VpQv(NT&!Riv%&CR|C!%~rB{`*L% z`;VXEl2;?Rp^1oJAspJiJWRrPx!OjHwVHsZk|&%rvPdCdyU^?z4wMm#H;Fi{c&jat zf4=h(Km!lWzJE8;d)uOv1~s7@Pkb}aR;6RJ=n@I8MG5h+K0H3)m}>H6@!7msN;ANF zYZ>|O6QFboW;MDQd67aNLhVO-itT-aWmGa-iV+opCu%n69&Td3C>&{ij<26)KvCW3 z)~0ADS0v}9jq)^;_ql-vQ%4-8$-(XVAep`v*9(f^i80Wiqst)rIQ!)MJaKDl>)|!X z5kB~wX&2X*J5S|%@%`|GpUU$PIV>qTxS-JLezCw~TjbJN$2zv65er_vC&tr75cg6g z;sy6d93&ZmS&|(<%0gQ4{(bIfQNMHJ_mRc@x#|PW`eUAlB)5jq!4pN9Ny|0(?~FT= zgblwXAE+U8KFB_~98^@}H&eJS*y09TP)qJe8h;^485F;MUf`S?W`N@up3b2DS29Fz{-TAZtBinSRWNZ0EYA?G^33(MFHmhmxmzVTBy#Uqkqg ztPE>*)4l)&$FR~!d=?6J_V+`2K8n6W&-=cV;pqh-qQCIyPWt_D21?-JT)aL=I3Xz8 zybXFLQ_wdWF6)eKgA*~dj3HLz1ADoKSRZ6w7u6Cbfi54BHs7Eu`23K&HeacJtXXbj?kBtSb7$BvE1}hct)#TNxvbZzw z)e?)Mz6Y!51{T}J+7=+48k|7#K=+y^TyH{bDMeFOA4~RBl+sTr%MzpRx(5QFX$)@QjSb?BHS-!UMSYndQ0ZSLfcHq(~2~!fJG4KlD#ad_|rxmjK@Ki15K= zGyvKxJAgJ)^7rtO6ylQs_uz6jusRO!ob8%|2qBp92_vCi+6$BOpw5F1jXmp!s-Ws3 ztM)oHIm}N$EdGK4tH|?eLTXTbXTL9)c8|ZVBBu&X!n?9`10N$;w3DPgn0}=F<3|s5 zYZvlrSq{AKB*7aqt-ejo;sToG`YpLjSjgMrtbz}I>Cgq{=!;RTFEjURr98?VnDj4d zdN9VpTR#@OQCm-Z_pFlAlb_cac&0IGKgNc1W5G7!Y3GeCHo86loJHdEtt10KQA5Kl z9G%ba6uwSSWIlr$LRJpg87yr|uUK&_Cx+AXc~r)&Cr^}DBg1H!Gl&D&w9QT{zJJp5 z1?cG)@%C}+H6E*@Qs_MxO#b`5cdxiq%Zkhbug&(1i<(RN7Pt`avraPu zK~p#~H0yZdv4ULR&>&OE<+7plDg&Qi5=#0tNub{7TG8;aLF?RF9xNvx=JDhi75~Tn z{tloICm3Jfql0NJ{AmY!;>7P>fS)l}AvQFmWbZehn+i@$4jvkkKi*p;?TsPs1YX;# zbos9QDxHx3Cv(@t1Hh5sL8?r56CCZ$aEy+F!6cSmz{BqYWxkYe*Q4}5MF}71g92W! zz9=EA0=($qcGbuVkhCS)(+Hh>QK8|Ja*6mEFaf=pcp-^0LhC)u)m^#O_KPF{fR(5d zp26?R-tU>C@{_ik@IrsL{TDW&gcMzJ-ob9zv+(s+7SeH5S>Y+j2+)sJBkg$lV^YTCNqtTcOZE1i}|c-@B|RO znZ}f|ne-R}#n*R2w#Tv1UCPI_|J}szl>(QkF#HS#-=4TLq+ZO!wJ!-r`yFpbo-Y>I z+M*5vzzf*CGys4mOsmSw2-;Y*UFERd6+U_G>!^v(yPko4{^Ec`!>U3FBI-?o(wkrwHO zL$`Dsx>LGCkPb;f;sA$EX{ketlpqKyD2;$3AyOhCM=22mBt&@o0RG;+?~XghWB4D8 z{e9osYp=D}T60d5Vmnh3-MkOiMfdRNt`b!R_d5jLRh(DmqPGeq**tkKJbPS^To>b*SRzY^UFduUabh#n@aVrsarV5_2B!xHb2nXUra= zibJq*{jr@%trHI>=|t1c#zK{VkF%zq`<5GzlZi{!8~TNgkih$(OK}&Tc#$$XgK*2g zvf3lZ;A-!+RbC=womb{&gOVD0$EB4;2`$Z20nK{PLl#{o|1>1JppSp9G$B=)-F|)A z^NB#P$r{uJec=5j8WKre5=&5kIOaZO78?tAQ;X{Iq^xZA?l!6YJOPM)KZAp&0{pwt6gY^AlFeYGM zV<==s(OrER86SkugBf3Jy(iqDBGbo$N!D)rz!{xelRHWwEiWg*ZSWgO1}z-jkq)Jx6P#r6UUEf$#_%edzV6Sq+L_a& zo4ZoW>$B4k>C?k6hXDJ$13F$)aXeMksPhR$N8;W7T4lFTp)DS`!GWKJ_2gNT{1X!n zuz5Yh%HdL;_I(mAO`rn`Ws4y%-LuDX#4F2V>OBn zhevJ3Gr89ZQX_)dxNA!wpvmnJ1Zp?tEDbp5Fb&r30CwW2{rk=H2%-kH!8BCXQ0g!4 zUdxRFjCK-*O58CdauZ_m^1da;;*x2@$b%w$yR2N2C@@53z0=3UEvH@T{VY(C=FGd9 zXdO=%^6tx^Ms)V-%1I0PNz)%vp{A>&JkvPZ7#;cxb66=U0R)*-Q)DK6Z2;Bg>cr>y zS)&hL6R-AuNcbqLc3e$Khj-`e5(?`Lq6hUV&lnFX05ZZJ62BSJ9YFSmdkZ6lJS(kL zRIFXq#r}V!bf)OITmO*KZ}rC_$>TKd>nN58y}q3U1WSN!NhI+P3fvu6YqA@9D$eor z_~cyfGQngxXV2lA(#pw_jx2wO!O#XK`yE3|l)Z-eF&-WsAg4*t-`|!8D0VHmMG3Bp zY^Z~ayX4OcVC8v&YVXt)G<48~ei|@-KCRLAdFC4xn?WQ^7rKbFWPlYVldQ}%i&|wj zcW8pv;=YC@wkO*i7KkoW&X(z(W@q>E7A7ro(a6Lkee%gCEk1ZuWpgYcU4q+G{nBz@ z;zLn8nRCOSkuj%PrE%`wMRMO|irn~NiS?6jxkE{>BROmSjL!nh{<}X8>3HG$)dTnvOKJrafMqr#x0$On?hYvVGvZ+<^y3C1jwD=AgqX z%W>hG*Z?;_yjU5HZIlBI3zL9mwK@6h?39W0xX5kD~rLdXbWJ8;=W&#k|yl`vfUL?%n#hB zP?}~1+w~9ChoIw>_TWKj*6H*`#RUlC*#P|Gf!Ea0MVu>qhY$?#Waw`=3J5DGUOXG1 z{t0pkV5xGalwc}aYB@%Vr*)YR)SwF}E`$sQt&JOI?_5jV<(@01nT{ zBGTPU;2(P8EC1;hH^+*b5K-dwY9T@0jed2*Q=vSriZHe7t6d&0sy8w?Y8Qdb0vy6n1KEd zpgto0CTk2A2Z98UO@t!VOXDAs1)b~+G&&3rf*Y>lu1c-?rOWt(D^N{JXOqPr;=de9 z&gR}Bii@lH6};69@C4a4U-vq)=~gd>;DasRRzg4!tKL7IVY+k}L%^po_IGZq?;?9M zGNJT15V$T9|P0OjKd$CGEXy!gAV2ckv`J)SI35j6L0@D`d`%D`h!kIBAMvONc0onVBY@zlgLrp z#7o}=-XGh!f`{99oq-|sI=3++D{Jq{pNez28QXPC{LCW|T$F-6o7xS0Ggf4D(=p&p z#Q0fl_y`WJOq)b|uP;|$p5RTh8seN2bNDA+xQ9b8$ti7>3IP)1p^kcx2 z#%5Snd##hZ190rI-YIu(6=v-L}VTC5S@Pf#%S;QQO_upRFJS63$dXudnoxRD=94kK3Jv;&;EiEzcgak$h z#WGJz5&RNOITqdwXSs3X1`E?Jc0I(Vv#z;jTtydRqtr~9Jc>=X<-!2Z{#=!cy&Bay zm;tGnHU}K6qL3|#_PudU=TQVqR%w+xFq1c0;9wTq zCB*q(MWwoQ*se`r!2oQ_7@zOz)vM26fT}@v&E7Oc9(4wtbQzh1DY#R4lmg_jKhcGb z$(9pI7y7l{G@ZlKdyMbHd5Xz5P#oE$7Qd~6F)FtwfAm9rO#wuyad)phOwe;HpEhp~ z62vB;WbFI+NWU!UGyjE%PmIQ*fnHIxJQ)hsTUlO7wrb0w zNRIhjLOHQBC^T0HC~N|W&H4~bQIFZ^EN+t_&0dfAvXEX!N5_hc9SL02QO}h`BiU#M ztV}{>FL-{LSg;5U8V=2!3~wo}Q_PV8`pA4BCE%Zj_u~v}3&=k8?v3MlkRF1V^mE&< z_dY1did-bC{L#O?FCG9p$=+bm>Z#tfzZgYi|0a#eoFS3!y!7NGF%5T;sApHUx7|?H z9<0L;jaQHF9;>m~?(R(GY}N{Uwq{#M0ytxFr2Rt{bmxoV2j8nI@10ooy|m~o5PH|`{|xrw?rtcR7!B0UbWo8J+iQl#XVn97 z79v1)g1#@387t}ONOcuZwDbddK%H8+xFty2jmQ!YNM8HEe`sU{vv~t-=H^^0QOLno z-$4L8U zpDuv_dMGdf`NBvHf;5nZu?3vypQK)b_$6%5C|~+-#DJk-84zmeUm~bOsR9AXAn;Yl zO#lsAB>+b8TPzr*>;qsPXnj<15ipr^-Pw*bW zjWu7_6R#1asCdPQ*Z2Re7IB~UC1yPz-huc?ERiQ9B#zY$>g9DS0HAVD7`+P}p0g4o zA0MH8%Lec?VzUi?%eDx>qaJ?d+GTWFuRl<$gxX?rc0F_F`L!36Vh_r^Ww#iC*x%Sc zMD3+m)wus6?^U8wQh~&Yn@^7@A$~JBm>#Lg+s@HR;OOLxgO@7?1Mn$2`u~c%$dZ>I zeyiEnva%beo|1{KSuiurhl-Z#3yu(Yw!Ai7G860t1QCA)bU3M~#XK#)x_)@1KTk72 zg0gfG)KNk6dt03E3(7!p!D@XW)6f4#&kp{=@~?mj!oPqDPyK2ua`WJPAwn(m7-cAL zXK87v_u6=cP|jf<9NuTOtQjZB)Y&^fHa7Nzw)7@24z@aitE58I+T z$l+b|Drq%SuWL@K+ZAcrjY+fdTTHd2!uDyI*ArK2#cw04)F)`&3jqKU{aR=@z}8x` zIpEiEku;)%Xc>ogE3pCGv2#f5mQ#y4u2GA$X~qE(4KW%T+D}r2sP^y)WG9$!Y)#BU{o(4%Ei9y~28wu;f@mJNbwi)to2(37=Y(_FB=A*eblj~-+p*5h&IaW3EN)Bt zzTw#+>W=^l-s&Hj7&_mMR4Z}pCF0hGpgBMghFmq|4)$|Rek3?|r)o@o6y-EDG@Lwt z^XNS&qJSdCI?xfwt_rLL$l+eI5^eAmY?w*}xxq$SwGr`JN5}>sX>!!@_U&7t6`*`o zL+;R?0PKG{V@*8P}`$4Dd+%O|G1?L%?R zF@3AhhCTaCl^`((7gskxdkBIKmf^PpaDSzA$`M5+6DThpIR!;_TvK^DYk6hm5H}lJ z#fq=;0=Hc4)2F^vcE)*XWD9}5zEP%*WPv_A+HcKunDaLZI{~2QiSrmUpv>dks?-`& z1A2mMp|j3lMPNTZ*bt_fKi0!-y#n283M-_tsf&C-L-7niS@7|gJN?9LYHA{;q>M{S zB8bZ|`#MsX2q_9eKM3Xn$?LQcaefCzgrF}oFq0@6k6BF=$BRh}-)KadnP=kdu)nHG z)?_WY{%3k_ZiAjuRfHPd-bfRd$A4#cH^W93p!r2bMFWrIOB_XRT~2b_K~y#}&op8k zMd_vq9o76;_C0Etlp~v~dPv^bcaMLGJv!Gs&1H9_-Sy$yl+%s4Z|ZT}jW5^|)Jhna zTYEKfh%Lz=Iy@|Gpqi4q(|l2XLUAz3@!|cYwL)F&)=*-Tu&^-r!E75-^v90q{OO|c z71A26haYM@3uh0VTs=Te2QqBO<#FRE6}H2(xtBK(kLsle78!ULf2IvIsVJE|ri6|$ zCNNW@&#ph3e5Nnr9O*&rmP9ronxTK^v5%inWqd}Qi#tuy_{WEjuUpu39j;t^##DFR zkhqqa*-PInV8&{*z%3`ebrH)m`^U&8ZcfPiKgA0(RIi^OY?`@$Y9ncEY$SFVLO6Pq zy+759Kqp5)i}9LobV|;7;dyJcd7J$Vw9r1Hp;p1Xyms%E2NPd5h?4POsR zh}I~um4VrmJrK7&`uX0n*YZv-lCAsGm)ET)&mm`@jpssJ-jhByPszu1DPs(Pou#!_%I4#NxyX+4D;IwZ zC4>Q-IkSWWmD5u**ZR5$$D=*2Z_a~EBchj!k+0rBN!;NQ5qVLdFd*g1S|)YoldkuO zvFpNip4mb;QLhb5lcs13REkio(GP~IXv24Tv3Vsv{O?N{^QQqhKbMJ0bu<)LW}LQm zgre}mP3v4riby1P4htg(V#c(*NL%^-Cq8mZ&NO=8 z6vFMEA)DIX4{_B^sZ?_Q98#P#m*28S1k25(1l*M8?^L6|Wwz zhI-Muqydbs0hzT?0|+*52u<0A)%J+c8l&N+r=?G&OnGmv5SkbF^|5%m(Jmhhu$;iZ z29|qTzm7j5xk@a-P3lwZm7|d-?9YcSd9AlA0|(J`OZ~U&V>bbAKhem@s3+e>JAh=8i1-^Hn`dp(=b@$xjOx!o`*D7g8BpP36)JGL9bV5a-Fh~2_g==Sb` zV$ez%-buE{&L-RF38zM_URu#?Y zXU>5hh0bJL;pH~5aqUrkZFe0_>-O*O#Y{i!=}Acs)O_1lK2bKWe7J4#cP&2-#)C4& z;l2mDRg2R$sm$CruTC1>9QX8M@Z$NsCd3miV|?tl^kpvob!dE`b-3y*TlZvkYO9EL zZjLW5)xRyZr#{+bX?VY?^bpg|NrQs$yHxZ^`+Xj>SClm29gSvI$iSiW-#;wlDwuTW z=#vW9Nx0;@`j(avmHS?ElBB)eu!%&Mp4hjeb3DW87)JOgJ-GX5M!LTw9yP ztj&@)o88N;HP*J<>Qnflj7|E2r2n3-d5Sb)=lFZZkW|*L`3VgE`(mZ<9?H)CB#}My z6HBRJ=t0QWi~FC5L_%byTuCOZiZ{Clet4)Z=P5RgOxTaE|FVEX3{as27<$wQO(H5g z7grAAPDl`geS#e37r4Ee=7fwxQrmy!Q+843H1NGu-lxeD$t)n_DjBdfKeIV~o&OJk z8K>E}&ldfb)pz5U1H#UODuXfqe3}+?1i@CH6~;&B7S@D~K}#oipz15QK9l~WwN+K! z2TzAq7WQCyZv0b?@-T%)g~wd`wU7XIwGC(GcL|xl3gxnRT6V`%b8LHl(@1V8(2G9AHi0|K394fg%FCcB3yF~_B*2nKUbfKo`(}e zL>8Wav)N2Ulhj{7;!ur{*OYMR()jzOPlV90dAVuy0%9-B2}MKV3OIA#;W5SgyIaQW zoe0p78QMCJ-r#u%S{TuTkG5@7l+t1+_mJe1idmHJu8BX`m3m7}b}c`4*@E)9{^j;S zVf5r`YG=ff8HDL*iNV`kL(VU1pHaFT#ga(vdtmcA65!Qbo)^>B&cRO@hsP>HVZ=@N zAfFn~&kVPo!P*~a)fVs4VDr|{y9A_Mye5Je_87Qr8f3` ze;0lgBEoJWNc#|iO-|6_ZEl1oQeM8e+fKDlk1Z)K z>}-bQczWy7Qds!8Z&mIGhhgmX2S*>hi|U_Lg&74jl=sIv-M+!T6rj!YH94G$X;wAk zRD$YNNRf2nrTt)@xQ0_%4hwL)wwfBHC!(Y*IH?;d+(VKTpAWCB9jWTo)F@=0p=&Y7 zv{pmG_a|SkDY@7~(~gL)a#edB>G~|2$XS1m$QBasKe%A|Y~DU1$kj^|nKne2eiHs;PAZi6Dss^M!B>wX*Z%H- z9wIJ_tWwFahF8>6+`jsj_pW1S5PtS#5x8SXaR1Zm3C}_T^8{)I3dIZOUM)sH*t@Cr zPA)~Wl^@Y{JECxip5+6Ka%+vKr6#brBu>iMKJxc|g5`$~S?c}-{s+pByvX~Bu5L8* zVb|%z2VMGe1bPRXt*6M+P@&g$!!m(oLp#9^mMF7dwN;Ep;7gIHn693+JbH^gL(H0= zv-XCcUc-lev$Zueq`%l&UX9t2waY~m66$_~6zldQCM|x@TL1NL7$4rJsRd>Eqj2x1 zkFSW;T-CF%nfH5DQ-)K3(h#R4yrUZgE}T^-vuQ*qAokGC)ja~& z=c`KDUc;Dsbp2A1h+ORmqxJGPYLVPM?J3HVFbZ>Bxx-+rPLuBo6|=lJ!p{DQ$&kM% zVRR0iImoJ^J+djJ?@@gX)X0j;b)$w+_RoPx{_qvCAaOUq!h_8^;>LueM1J($n-H1t z5h&w0<;XP>oKJnLC&GePpQV#tU$5ieZ0fGi;{ta#zVMerX?3jAeo zCS_8U8~g=ht0*RTp`a6Q?!pDOa61uoJ98&P6ElMgWXwV*pU9XPj4f^L$e4x6n3(i! zbR11A>6~>e^bMS7?F@9x>1^!5d2o@rrM`)wiGlvfg-i@gEVK+vv`lQuj4Wi#0(Ur$ z|Da=LVY+*Ay^fKA#p$RrmM$h{W;$d{qIc*Rz)-i<85zl#1;Hn26AODM@V65?t2QeO zCHN#@Wo2fdW}qu&Vh3H%%tpt|3f&?lt}G)>#v}wjo0*szfWO2H^h_Py5oPrNevxA8%sx90~>IjiG_|4`U0F*(lOMrF*!Ff^lA8Wevp5Oqq2=7?nkS zJ=)or=1yWZI#$Lqmih)}U|@ZxlVKSdIM_}G)ptHQ!+HnI0-C3h&DnEK9}AsOGI2Sb z5aZcp_9prUwx^?=+-YZNX=Z0)^~;lbmKGKUdZ!C~I--sZB=TR}YiMb9`gW(+Sb-P+ z#TC#AB|ROp-=0-7(YG@`xt5*j^tGU?#SKi1jL)9T$jWgtOLLvGi%;Lg)>ud1((&Z} z<5TBXS z?4V<2e=0|D$flVXm`}d2wR1ibu#KfXFm7m3ARBZvHnB5MveE&1%Pi~&R0^Ckwlg<7 z{aVLH?^H$Dpf4~zk)28Ls20@hBz%)|&XA3ICve&Aosge=W0ZNM2aCT7Ds z(7(`maO=7A%*X$L^R{+2mZk>hzF|H72Yhq-R^TZHX9|9D%1Y;FxgCr0?=RlZODJXl zT!o#DGZ4+$h)gV}l0UN;hSOumpY@NC<5bIx&+8sEEF^ZFQ=K>ZX>jQ5AJGeN&8b@b z{TG0o^{=$=bY;&i`EQom2&}c$-<0901?%XZX~yZ>{8!4B@l2MdvSm3t{%gyY94(_cYd>?8p6Q6u$lqCFQ{t+J{duugq=Z%HqistYXKtb zZwv~Al;dEp=WJ#IoTv>mxaA*sP~8)EE3JFxGj(&|*=9_O7T z+v#2Z5g&O9(a!lup%d5wV$-qb{g1iYxwV4p)(M7xX=G`kV1!dcoFnCaM=IQ;VQ{}K7KpUC2ziJY7`w;=y{7{Pp|w*S3g#3^PxC)-n0 z4&Wh#5YNf@XO;dRg36Ee>Q7k4IbZb`1l?y2_S};G@52#)c4`0n;s`)j{W`z?YXgaY zc8SjP{J#;9VEnC9{c9kJ(+F#O4oRGe_dJw1(Xl__G5};Wz{o|vYjb-j658Vw$JM-#{ zET>NL)Mv1sWq$vd2Rj+`Y-n)($xQ#qte8PT08nT@(RzRXp3|$3nY4d&cS!dCb=l~7ck_D!{=KaAU+!-pBmKwaj$@nrlUDXy&gOp~ z!v3?%c;<_l&X}JwU;N*JtYiEuQ9=L6_!v)f`+sY&|JNJe8CLxn!2L{2{7LWjX9CzC z0AObea6aTf;Thsp{)a8fZ)v2zcJlepdN1Z*d#|(ObKKAwA^4Z!u_4?16{q)aM29~k z!ee}R1_=K!Is{D!HqH>tJRJx+{AncU;AfzDeDo92gpU5bQ1evI=k5L!ll@!G{_m#` zC%MeYpUgu4+Qf_H3<#bE`=89`A0xH%(>$B(KhEs_oU~jqnXO8@Tn9iS)V*iz_i0soZ^)(m$*!QxmTXDJH9>G3&C$at3JpHJzTPqV$V zlGVSp!wt+t*WSd;j@HBie6+QCNCtZ`OrWgs zw8{Q=`c3~r4>n4~)Og7k*bE&T@)>w?Z7#7Jp-d&{_5Q=dD=hX`$n9&o+OZd~tdM z@bbW4L;H=0-n8;wxIlP8LRdiANoz6bik(W+=O#AN$VZR9W;nu2qGw3Z_g>0G?2Rk1 z6QG1u6c+LJ5*NEJz$qC+SHnix%jJIy-Wv`r+A_(Sj9?XIs>V^f(xB_EhNO+rx5Uxm z#P=S|w&V)Z($ZJe#l7Hu^u)e%5GiZnN%>)a@$-NJoaXXgvqmF? z`NdC4Nq;C%_{#+WY(Z+(tRuKY{c zq$9@HLIKy*tDP8SlQ{jZ;xG}a z6MCgpGjW*2Zmp~TJ^__XGPb_n`=Y`N=_hdBW}~(Edd;D4JCnFjZ{7_4tmTRfMiZ#> zIFNt+;BCd;TyLgO*K;05p2-e(X__lViKHqy#!p2+N*GCwSuwPo^hHrNYy|J;8kdzc|sLZ@!W3DfXMC_W`z-xgZ ze9A(>hfTql@bK{bc6KH6MvDW5?4F0*;rLwsi-W~={UO;(FVm#r>27P5(R?X2effpQ zb&G~v=IJBPqXR7PWZ_K7IAwPur0mI-h;;D83a7QSnA^oTXRGL~4vV4ah6VIPT3~Fd zE#|gj2E1q}cB-mNMxZ#Ab}ctCG4X55QQp>vig}R_hsyHNB)V>B6jG@bB;GZ@Tjj81 zD=69c82H?2_`3uiSJu1b7WniH>d(OY`c&G^5G$k!TX*p|tzN*SRuy(~VwX*1 zPc^eGi+sv#bgg#(yOfL|6%!MhL^LU56PF;xsixLHfxDfNC=p{GbpgRCmjCGRV69J% zZydy@#d`TEg*FMsyndn?A;2vg{6ic*drZbjS2l zR+)%f_${8BD!HmEUBROwAvc=lKj!w7TWas@?C`5@{%nQMDfo$+*nEI~u)y8ob<67H z47$Cu)?CvqcRddu;51erF;MI)$vVCXAk6|I$!7kx{n3X|h>djIB~p z=}LF1(CX%9SiDZ5Zd1Q=&cMg~NBl$78Tzhid1XQ3XsP6yXnY47O7^PKsuQ(46G$|g zWzq;lgO6`hOBH=k5dIuXsT7-B)FfX1eSLip{t8*>J-%$)4e(9zSR;Oq25j5#qye*_!kD zgBaWnHa~ylphZ=)@u1eOb=z4i?pj?cAN``snoM-+Ql!wvf@8@7l4N7NDiQkgG|(45 zQ~ct&n%j0Ld*tBl&gYIS;R@EIcoSm!P^fBE9cR*HOxVO zwRBPB%|zNJEuxRL?ym2X`Ly%#g7|afQ{UMJv1(x`PV}vgH+-z5v9w%ak9iaY&oKB2 zi#nr`o@1y<@XSMA=Y!;f1N_F94VLUVuX|3}w}ZWfR|8d!^wAV@*Er(u=(k=civ?4# z`8M}srA}?;`Ey$0TLvGOo5OIe1s?*zQWX;+Aw*s{`SHUTg(MSly_-0LRKPcHSlkDG z@acAuKDv)$S*?T*lEjzQ#Sx{#gz4v$&Z3=)ZS(yhBE2yo52A|IZ9L{Qh{$}z;ChPN z$U74`E)FVUKQo&c+|rMeF4_okiWbHs&H^(j0^YO#wtc}u)5P?2X4ytlfw+RW##FqY zMfCA3*rmOw6|bf1iGYdJpjj!4ODJBJkf77p(HD_y&wZp^Z1|!7#=9!V72fzS+>b-@ zlqF!VNbD0%%nbg>pAsB<_%SD2AC=e)dJ?{T(F%{pKD!0(bF=ao-b~uTW(r67Hs{wy zeKAtcY+ilL(!K9 z8y_su2r>dPIP8|Jzd?lQO7v=jEKRg*RHEoo+Xh80V|44d#_?&lm9I6flCk1^MJ}6W zMFCjm@feFXsITnP4^i6`m#$RIjY4pZ) z^ld>i)VCBqa|hd_ViC7^`pbAxeQYcq?(eM4*NwWZhrJP#+>gR!bk-``YT6?^di)5l zh?tyLd;+mR+{FEddh&5ZuBQU-FOR8rBLgXI-K<$=hO_dpzqo=d%Gd1UP@&$H<>5+S8jYeX*A!CblzmS#=QTmC!5i2Z z7;u#h^ZDgp6(^*Y%c5hyt<-Ekf1|HnhO#_brya!9_K6}b*Ky@*PFqJjy%3sZ>w!|V zpPX>qFbSDaCaH$M8E$OVOX0DgQN`OCG#ep;Y7)oTf(11tviDI$+{w{TU?Wf5ORzmS zT{ihw=l}2W|C0>dIVA{{B?Jfns9h5X0jsOyLb0@(s+BAT?HG^z{U^SCc;>XXLFutG zer59j#8sWWjX8J(gtyNuYqOunFnR1dxvhU-bNW7o0K9--pkeMj);X^mD(9*q1BeH36w5U{_LN%B{T*xNY{g5?>))Duub^0M%qnai0QfXk+{G&d za0)Q?V1tgw*()m}RS=YF%#?~>-5Rn?S9&Qs|6`^zNvSN*6F^YmXi|w#UgeS#%k_e@ z6Y&z1RBI5>19p%oFt#??0uWm@n)q~koRUoEGd2`lHj73OgCcp|U1-!_^SgUChvHx} zJh-5kA)fl^0{~%@wVw0+l)@q+M8L3TKjy~o9Ri3OdmD8e6B|1)noKGO;6{n3EZJy$ zI86HdIm+4n+am9soSliRCmQERgV4x=Z5heQMG^2R71i2nT(|qTLBM|TrS%E(%e})| z_aE0$QN6({6N`o8IROvQzs+X;q!%X)gR&Vz^3VKJ~jTZx2_@PcN!h019B(Jc-eVT8uZ$T!Er*uHi@gk6X^ ziG^O2@bcnMbg}2)5EA0+lRXcEVkqQtRIoJi2Vd*>+Ni?yzt(B!e<-#4eHub%n+E_X zs|DS@(SiVX6byEnsy4v zlQ;zcmr41F4C)2a+ejaX!ZK0D$3@sLe4>1dQAJEV8nfhBMBH2LYOoC6tktwQm@l@?DaEP#>AYqmT#gf;wLMivq+Zmckx4cteXuQ;S&TTr z15Y75z_GmX6i!Qmzf7e~U!{}VpfwVv?qCa-Z+C_(T@%(OTvAmE7i72&OJ7nAKRkU? z8s`tPC-6QA0T=vYotvYdM|zRNelEE0N_T1V5dyk=m4o@zhi7U^Re92|%Y;^2iy8n| ziPpUO{#DX>Ey*DOf6@2k>73BhnLlmB!cCB^f;nb~Sdlq5`dmViuk&z^CWcYE9=Y)& z8bSW6CAk1&Q)Dj!q!`_pKmhlVuY?i7Y@=hdTct>HjHHPm+0UROAtdf{;Fz`@X9 zloNQJkW>#6+;HIVZnNp>>PFsHATF+wK048NJm7?;3E5RiV0ci0@e}C7vYQPsxUIgu z;=&YG7U`vf5KE(R8Fq0 z3m=8Qh`{=jT?a{6Z>Hzr?u`GYxBrQAuJ8x9m{t_2pjlDEaA436?jQo+(Cab4ju2VD z4mag!*%*l0c$QiLa!HCG_A;^6bUUVfc0nG$OfoM%2zU&Un=El~p)z9t8O-)(A_)o# zGPrJ;>32N-FmSEMcy~M5nK#t;5?|dx+z%9ibwp$#1?TDMNuymm zOy>St;bhkCfI91)spzIclZTsn@I#L8K=>xmQ%;KRH}<9_mZP`gsMS7IX0n`m1r6|$ zi6YTUkzO(I!lI(YfJBjf81~^TD}-#n!Uu?)I;%(q%_`E%4E|95vcwP@M1_OX^_C0ja9fb&Xv)uw54CY^0^tc2UPO;Kva=ugCT^;$QMy{)AX5Tl6P_cm@7dMBOc=1>H% zWiP@+dmY3NXv?*!HgxHyENw4lcCQ1$?!x!yYhpn_cxP?$!weqJao&ssyjD^7gFclm zbO?Y0t|}0NQqk!v_FXWYF;v*p)YJq%PhL*@`KhSGLCWI=^s)v1m?MBh^NguE(QCBV z*MS`&A+z4!#Ph~sHY8+Vkk8~Zgf0RMq|{>?fppO@_(vzv#tUfmRd!>RL3Oyaw6sWO zBiVhxjbtfjM;C^^ECsTlBRLL2i%CYEiQeVqQ;FF?92}|rDxhSo)+R)Ts~k}n85s)}WtE1Ah=?Ld(E$%**pgq<*lnn`?BBZ`}EmOTvcOG9K zy#MP6urlt|J0$@K`|DkN>Tm2Q-Q3+F05;W?OaKD|bE_8 zfUf8C`IHfsC310J=cOV_IZ% z?Zi;84;qR5W^PfVFa;i(`vxgLSkZxA;9wGLkizU{I88JL&3>)nDl+K!CZS}u?E1CZ zS-rxF2t;iQTYO$|SOnAxn`uNJAD`7WiexJTDk62(LvGgEjoF@y8pVdCg^z6s7Cz=; zNybt60e{X_u2Uuo>u~*nTj9kU1Y|H506UF*Rp7x?YgCqEW^gJm7>R9~ zEADb_86CL8JHc%weJOAtalSa{(=dVUO(FL@Yv((lsJK3PAw6tMazYcD z|CGPr^J$P2VL;Yvt;s>vt@jOt`nb0I*Q%k6H-<4i7NE-PMr@a1n~xyD2A<~vU<3mj zoB?oj+nal-UThd96bMQx!61iA1JG&h)97oRiy*tbxI0y6Gu?h|^R46>dZe@^$g+rT zA;WFl&=$t-(D5V*&#eMh(n?3HPRAnqjFt9{!%{G~-oy!4eNzKAl-`X%oKOd7G6M4i z`YrO5@*r73_9Fc@A3(OxF*T%6lXFH-@Gvncj;w6zYP@~`14ua^F307pl1^|>4bRp= zB0ByLr>5L0)`x1(xt;rQ*fbE*BW;#n zkx+1cah_+aZ4E99giSxFtzfR@cHdJr_l~WY1YE1uBT%`}1FIK_QmkHr{=luLGmX0rZaY>Pa>-hm&n-vZ+yOBmw`?_t|1R(@ zz|#d-Pc~;8O=w01Pm_8^Qf4F`eX>eRPokU*aqussb)u>jKeEaGo+9A;+PH_9R5o$I zSyrQhtQiXlh4+5>6Y$s%Svx5&;jPjpxxX{@M5!=qNC4ust-!4fCmWSSl+f~P^$sY^ z^+y0_g(QK(WBX0ImEa?&SEZZuz36hEkRqC{UMnmoleJqU+p6ngt~EB=$&36h?1pgd znkvEf)^<< zuHLV(5^#^E7L+VoYwI$o3XKIxzwTJQw|bF20>Fp;2B{#e^{uPpd*iqgUjEf3gj(?y zo|&v&NyBGTx82DIKII8GiL?i?6G1yRpg4qCN2(KkkFaBc8)#prUfbjMQXo=S>zUr{ z+TiSQMqTxSx!8{wuAkj!Dq*}3mM7l!gk0h(kpXZb39RN%IUQHcqEDfDs6hh#}8j@&u(%5uo?| zj|z;so?9Ee(5II9v9+Y)An2?bEmKW3!qp{SY51C>r>9fp)tbn1IU+oKeQOj+Dc9VK z8^ie8`YQK2wc4pTL)H-(YT1)1HCYHi#9j}}p5yLa5S|g~hN$-KC(0m=F4v3f7M^L! zbB|URB&P{@ytS4QN%J00J(OV`zUpI%Yynpa0W zH?>9^P~tIRF)pE+W-N`tuZ4O6?O%;zCpveC3BI)^xpzI^#*jtxC8aAgrl{XZOCr)_ zfNCu35ryMC{R_e^s}JV9#u0$Vy2Im~SSV@zj&7%QrdlUde7*PF?87CvP==nMz`!xG zNIGm+*4>A&iL=I@LZR2SOI+J-h}V07W!+-dLtcO&&esIoPCXK7JJLBH{SwV;r^4}_ z-UpLtV1_>i<~VA_iQ|VWZPS5|_=0bM1Qo2Cv7lPG6jbJ(dX3v*!Xsy=D^dGnxoRG- z=bax?*roVSH=kYrHXR_>wPvt7_ICCAcSQ+pb@BQU;8TVHy@mtjPN-rZbXgai)6k33 zVJ6Rd0k|-t3kv`jW(;brO`-wIF453Ha|>V|qVGw>!XLE9QvTg}`1||kEZRj+hx@?| zi+G{qFJ)}vX_uJ|`TzyV&b{)tCZSiDno`9{QXdq)1*3D~U}7#zhH$jRX>r7Z$=W&> z-NCv(CfssKelUX)*nYY@_TQHN25(QL^E!F^W7-hgna;|3S5^sFdJ1Upa-o$qgMrsN zP~yg<-t60*c=+2rjRd}B{78g|o0yGK(XZ;a%2wwdJ5%zQxz zOFS^l2QaGy+k10ywg4$`#8YV6 z!obMd-MbJc^VH{9Rif5JxP;0c$O6s$hNtE-eGk}w=XBerd;I(>=t+P@29<2w`9utW zAY-5#Ocr=*AsPapX<<-jD>?dVLMt?NC3F{upBT22CtZ?fTL=ASXN~jJ3*Y@x z4B!>u078Z$Q&q8I&t?A;6FQ7CKd zOn4FL?@)I)5Yp1puj8XXu|$El6O4GrrcYN=-!QOTdn; z#NZuDC=u3EU@Gps5S`bb``85(SOEx@U2^$oVUDNP>}k0+j^J=0sy>R zQ+Mkx0&}9_+Gw^ORzXjChEhPpzJXQ{pw*loa6~*cPU}RrC2BL;zq&1xDjXiM_uV(5 zoFLk1w|03rei|y5yoCOB)lh5dRH*=_t%sWL@DUhp%vqtc@Fj7Aq zSzO^XLt5~a>&hr;TO|3|oeb&6x3Z%%#SrUC#3$nVFWsuHZVPM;fJd$i`4M3;>9J$} zh`TzqQSDQ#b0mqmhrU#tT>d>kkGes2a;U{n%TR0V8S;J*_BU4C&D4Y7>PGO=0a zQ_O0n$6j9|?#f)Wk-*-R#`rVkISIuya^Qc)obW6+DTRSuxD5cIy< zBv0pv&-_L_*^CtUfv^;y!*lyw{p&NF?~~LDWT@-C&s#`~YVUhn+oc zxb|qs_e(c9Gqrix&^B4nhGc0)vhhByHk*=d0v5uS2$Ergf!nUOKTzLMYUUDPf&K~^ zC9mfz3FYLRVM! zwcQciMmFas(px>qu0UD!=nIeBpRbX598ZZhy&Ozw12yAdnGj6rTKl39Iy6r2+XR^! z`OF9C9d5dAY&6)<4mBRtVkkcSN>8Hp4Ft|v#@fcNNU(lY1{+}gLV0~kM)8434huYS zHzs}n5XBFfnn+&f8_HMBGP3#7mA|^!AXxBJG-FbxUrj(-oPyq{IerI+2`F)S=vSb` zJz|ktRlXn;aosLPk%3!$GCS2Kj{!7nBLCh;nP;Ud0>bcH+5=wwZA@dsA4s6d(CHmd zW{0ts`*<>329F_SVv62qhy#$qNj?Y12|E*aEukH=HGZTiugKvs2D#fV2 z&!H#oJJNTK?BiPMyW{RM-Drq*7r;=Ib?yiqgazqj+z%VMS`#Yec=)!oY9^A}orvuE zMK`7FWy*}!T$^jDsE(18 zDi*-|AkW1Z`B^G@VyWlWv;|V2STDR)8HTU%8FQV9ddEh~^s>`8&*b|%dXsuvd~6Z% zq&IE^uCia>`HjhQ2M6|EYtyGCUEdTfkfxO9WZ*&6moq7LOq?AEozFyJI`JqQBVaLV zTLM1eJOhhS*wTge$@IxAH`1&?c)R71rmq zKB2r5y-QZw=xQ1f6Y(y2OxFtjT5@&p@DteXEM3`WIZEP#kLKb~82{$fxJ=X5iSxko z#+MrwBpZiO>U^(^JC4T<;KJ}`kF7kz^N@h;M~sw<)?Z!Q5<;UVaB?N{D_28vBKz*I zModpCnYdGFsF#+$#`2Bwd9yIlya{RAb6$a|z7{7u%+ zb{Nq=G6f{-N+jQBu6ZJ7@AnOH7r0<45~m3^q$^A=`LO=b-@Nj9Y5xl4KH3oJi`y9` zb+n=M^hS_hI_QP`k_#^C7d<;*wxR?ANCPW5L9%YMhv*#JYqJu!Z}A`qHBK@J3;RvB z^#RV8M>S0xE;=wXVfJNUc#!|S%QR%DXhha&jpDHbQb9DR_lRqwoqiBz$J4vj9E>fX zd+AF|Q(xM5SE4F>kvZ#}Ml!T}IADK?c0RUyB8_aJQ`7yzW{07a)iVOW*sagLYfTA) z!D~%xj>&6H)()WNNe>Xx1Xrpn>~iAl?5$>`%kvuIA2b|qJYjm#uu#c-6FIkKiO2fz zvXAAk=)z5BZ4)FgPO7dOLC+QTUq=_t{aidSy zTP7x&B$R)<;9o*1OEi6f&ou`MV*~583F8{gSj?fs?%hZ*1>$oCq{b~$kh6LpBzr{Z z+EXKPsdn%h8vN68(WbN2{3g34ehNdUZRv7i;XafMh z{$ef{s~O8Z>`Fc(9T;;mJjsB@aa(w%u?O9FbcZBwxD0lVDI;W(+hL0R9=9XRq*1t7 z$6mhGMz=y-CVior?iAAd09E|EX%-D{=6COFI;IL0hl@v(B7nLd+|krwmU0c+<*-|D z+S&v!!;+rI$XJ;sm+1h%ip!^I%3KS|Lcjl7R@=g2p*Pod;kjKl;M09XDRE!jtM++u z=b6sj3$oT&vKTbQ4|mxbs=^k=1&pz`%!b0EE{dh9w5S@7mL{PoHtf0k@j1Ry*t8s} z@RR%E7VJ2$+R~EjVHPu5#w|=_fJCOp+W_R%9Jb~s*#7(Sl9RT(HSFp2W_p3hSoL~v zjS9xdT~8l2sT(2a0$qsN1KtDWwghiV7K7`;%~W$$x)3Ry4TrPVDzI!&gz9i7N(uoX za#M2T4zw=*kN)m+;FgO);vB2_dn7QfJZ^`v=E^po?!HN!g*yu&Ak3yDN-V< z0|-Gv*;W4RJ=R^33_u$yXJq=+=tJrEZlc`PgYt%Ee69}l<+h;C<~z8I28$Rp_sT)X zd8us&-NxL&m7#F+rrP^I4rfAR6RW6{QfSJO2mJd@6uAuc-`~zCGrQ*-G>X#FHtE^$ znI1>7=057dAE#T zH)}aJ9r|;mS@Nu~h^WqX*$8&ODZ`u1rC{>>r~7iBY5@m17H?v^wy>dHxHfh<4s8Ud zDMpYAR8l}OQH-CTzq)`u`p|8CPi1x_lkBqFCePRGkqZh63JG_d@gCD9gz-7ziM7Xe zqNdy(NM?7z9*wx^g|n_UH2fZv`NyZH4c6B|;PR8~YXFkLPz(hEdg@WA@dR!ds>aBq z42Dg@0Wv43N{q^ycZ~2T2uBL1fNj6{b2C})&ynb z5VK26`UK$wL6hLe&mlhTfPbpHYhC@hg6xaq+(F{YkZ&J^;~eLS@9%}G7wa;Thwmar zDJY01^OE>S>>^aIU)J3!Axz}7#)u#g8~fg58yT05z>GyC^$E`@m!ES3pB>RujkYOH z3ah)qS<`NL&2N746>=`4;nW3T-XP&Jyvo(h0Tc0LM)axB;T_9>iS4F3;=MVglDSpN zr={~=oR}eFhX>a&XtfZFvGQHBd5Xy+nzB8^#~TZUvhLN;xp*=T7g*0wG&_vj?C1aw zhDg~A99e*8wnh}(Fx0IV_IfZ*500Co$NIhQ_Q&rTkx$vJiT$tP=OkkGfZAuo3|w4| z;Jm=Bdu(g+jc41*N)r^d-&aaIvb0OtAMaOcnr~{ZcCZAt-mNyo`qsIRDlqEqr=l57 z=zPI#!{E-GIeAI%|I^XyRFIN%h@7q4l5u5BpY7{M3X znD&t?l2**3N*fW8vMP#0*?UbS8R#YO?y1_ws>CpCpmbrwY~^6Y2axv$M-OT)s9X30 zx|-_Z!T$bpbmaz59`9vZssZjXVo9p_*0A*WcVm;2xRyMqG7D<-E&Y|AS;-yqO5=Di z>{ff%JhZv7xXR4x<%;98lRrG+g=snD4Yc$%q$Jv0e^=^MW5u{x9DS)*Z*Z?>pw5*` zmF@_+Zzf-H)N_O3Dj8x%L!lH3<7-DT45u1~FZT1<9^`2q^iTLcz0$Zvk%3kuRz~%u z_|mGzOUev04|^7#R)7c5qzF?-jheA!>K=lojTcbwQM=Bac%?FFJB#58?}*EH-Zl~# zKI_*ES#QFt28y46{w&U|0S#QauEn}ipdGs>Mc|@7&Co|WBFw{CqTp|` zL7P>wwJe2=u_-<5j$5&4VuhK;1sFSeeFA1f)i(lA7T+ci%w!R?A*w9aiHNtg%Yz2k z0Z&WybuOP~H$N7W-lo#+vA6DdgAP|Oy4dsQJL0DHTrp3cT$^bpvlgTxa=o@R8n!}l zpo`Tt5I~uM_s-CfOmiZHW$)mHma%eC)yNWVCF~?p{SH%8>njFx3Yd6ku{d-cQsTU5 z0ZM4_*XamSN#Gc9&TcCGvJ}4eeE18+A&0GqbUut@$ra$;hFBi>ol6|Efz#%f~HIDWCvKz)OI@yRkGOX)_M{FO<`_HAIjtPv~s*jVBzm2PL?h0x>~U;;<0ieqx|$CGYh zv1PONBrdiu9-2r=UtK+5EgqDcGmXT6#G-)^R<%=%V=^?Bokr8>O^Gvs`;{=kk?p6H zm!nJNdc$4~yTKRTRW4Ulpt)ZqmQUBlk#Wb9rn!3b$i?E46V-~>r)6zMc3v{5~2D1O(bN<^kh!)mH6 z?8Lr*5q5RDl`!`9eOYrgJgz;d{aRx>-RtQ^TkgZ9^cEx4;pC;GsuK@~Ql-G#tc)p& z4@$i+oIww!FN0R3Wv4Fh2E;w3#rR$&hvR=Fe!8A*Q=U_6wk0si;#O_fwymjGbX;Vh~^((UGv|q*oa# z+;s7{dV>54EoXS`Oa?Kj%MQI=rWbTZYVF0J@#gQXl?&AgN!zh78Fl#dQOJ(3O;S|Y ztOdVGHXeR&-I{oZdD(Bc0W0}x{ciCLDmuRJr=+{-(=7x!dj^QHA`x9xdJXeo6>j59 zyaZnfG(TCseG+3dp45n|^i4*T=}15DepLJF-OJMZ4#cW7TG`7+(I1P-?mw5aTb&?u zQ4}ZJ5FC7V$ep^Cw`B{#lZk@sAVL-pa5ZC@hXP)AP0t)H5RiyXjsn|oT6cOV@4sQG zJ=`-@&cFMSh={CH?Y1(i9=T@#hmxnjV6egDBl!=Cch;8VMkjWnCU@4VBzD|8Y%)3` zuR)knIa?{M1&b$kz{QFWqoL(mODXl|Rg2xd9ow#(fw!sDUUPr(?A5y8Uo{&qvg6Wi zOmk$4Q#8{a?hy7RT_Tq}Yh7!JooI90yN=Pem*)LlPNWDD;#H5hZAOPZbqrdCjZ8gZ zqRbzkKkHls+K8QWAM-vX(D22Zh*h^3`^KH`4AlNLr`3feuOwBx$d;CXU6*G*!U z!O4wUQ4t{D=GopVBv704sqFiv`!z?~`ul){TkkieywSRA>qZ}1I%%sItheAkb`8(f zO*D?SFj5&~JQw8=(O1R*W&8fF`Nz2dQIn!6>TSA`;vcjQnZMs?4H+p4v#P(HH?TE& zASSeTRp9x)b3dZ$Vz6@eEW+(o7mrOIN*$DUWAQXN)zw^2HU+PE>A+W;r73}JEOyJoIH0i9Pnw}G^6*_A zA>0i_;ih+Suk9w$XtZ~@ZUu;?bj7@-L)>6pBeNdIQjuc?!k=exH7C_ zl&Ft1R&uFNzS_>3EaOIjrWeg%5r5fr54H_103hW{iE{4_+8T;g6_`&x>hG>?ke?iJ zTs^$9GQ`b_p~#xfH0%qdNUis1f$;A!hhr#ysQ~QFWXl&2rJ5Y;8Q>z7^ORqWW~tZqe7gr7Xu*>Y&O!+|Ap$@ZlU)C^jFOC5H1soS>v?$`?n|2vOQY zblcMQ{^qPT5_~kpBH}nMPm5Aa`S!3e+||u>k#=QDZBA7TMSth8CDvGUd?@X)&|J}QXT5VEHVFa)P}u?s;Cg9@UUtmMa#D* z-%`z9OQQWTvL<8okri3a;c$(p`-%#+V$nQ{{aaY^N^*9jnBhZ*K#d1R!2*f;%0kLIJ!=48l@WKNIpk^?0~fy(0)6G zte_;YhR<--8;gPy>{L^Modp^1*Ia%Wz~wf_3QFQi0q?Z0{tQreJJ+!N>xr}>ulHECRy)(cG~3N zL8M%2h>eW=Qs>DBszCu`SWPT!lIS*Uj1d~la)7OF1sAVCp$6d(LCSV)dg4N{2U zl}F16t-=(pTZ=njgCB$ae1H6xTneI1lcD7-g;$rqn&0cnblc?aL2ZdQs|oC0HSTw+ zN|EA7$X>JOIubyvYcy77H1e@f5GM<~{q_pj4hGtm8bHT&q1~L~#`1_}&}~agONLs~ zz}_VkK=4F>+|$D_PXP86XcB1J7TSi>Pb&tkFJLzxK~qTG{(HdG?%_#3Sv*H6c1bre zOWPQZ|I8+up()&pvg!PBi#h|^Dp9ujA%RgSf1^zeRPBa4v zuK;W~FcF(ryoJgzeo*d?Vlt2fdpXA9+&N9tlwR;_Y}^_^T2RYxiIY<6dF?D@x7Y*I z)6Mb64n6`^nOYL{uK2;Jd57+v#1y_5Ep%A13TIiw3{6u94 z_7}(77NgZ+`iE@{52h|)P;0!W+~24r#$3oSYZ$cseuEA!U_wDSR>_S`iKNY~CW=}K zG4`4?L3xrT=$L{>LQ(N&p#8Wr>^xs`V>N}n;Vw>;UI-sXV(@{vXIWT`YxkE@j>#64)(>I=Q{=CR&O zW;RwW#;M&KrzXp|S=R?gP-5cWOp7}?Fs>JA?zym)F9NpJutdZuJc>0GqaCORZ4x0+ z5frpt2}0ZI;-zqGu)wyU@YLZS2s#1j`mJM{MQefy+Vpr(=Z z+O=VIdo5t+9SzRIJJdLiuyS+*; zn$qc7x(4DlwWhv(wrkb$fSoaVupzboM=muYsWO^*qx-yz!GH)6Saz%Iu9mgcf4^}z z@nAgS-P&}yuL=`lzMr5ZwZLS5oz=Jxnlh;GtN2Kk?L&L~#Y5G;Gpl{b7 z#0{djkxLKWk&G0*cRf>r?yOOJo5n4N!XtTSLkK;wbZ(-c6^xYi|~& zI4zkbka>B)-NjMx8`p*{B4xm9oHxCoY479XI<6qEqWgbeT6PWE{uXW7bCu1~8PWgPPEu-#=d z_8r#?$YoZP`D&6=cCj7t;|LcK0K1>DSq-x!xl>EQHU=`|;k}n~$#;~`y|kJ5;-0+Hj`ygrUuO-Dz3n3$gqHf9D{fV*c>A#9%l06{y`$|noD?5lDB-f0 z_VrT^gZ43>p4VZm!eUGx+>Z{n)hlfb5>sqMiP0MbqkeQercp3T0s98JUupA!4O}ys z%M7*EZW%b*q5GN-o-KVZ&jVf6JJa#n@^9?sW~daz2b&}1G@!~-X^VFG*K*??LG!s7 zev1xaF}x>*t{B17S)nB`PwW=rnUSk-Z-ffPAHEYaxXfOxQD#60+a+svydl>rU%O5V zAqTXfQwdmR5^Kf^^ZNw^P{WW>e?h=>o#=q7smPO3g5tZjzNnG!Q@}nuZ&F8>GI?0|&LaQ7QOFwjK_r)f6;kCPp+yvaW<0vo9qQ=HoHG>%=K{%gjuQJ3{H#(m`X} zE+$ixFWAqh(q=XV{zuEHkxWh9y$Dzk>B0Db)soMf?*5xBByq6qDTiu^vnR@k?YWju z?lxVDQ$ddS@(=A`u>dA5pC41a*RK%s}(OPNQ5* zsN!Yi`OZG4@ zuH>9U<(6uihx!Y7X1g(?)ah}c*)^M4{(E*M{O!T!mz#Ll5V_FffpX8_)xB&6(CV3Y zzv{7IyGrC>!5q+&Cl=yi3-apf*h~iPaJwZOa?iP~!`Yd7g7WdUc6en+*hBlu!^b+) z7g`gkP~Ad`R&YKv2>HXFVL3n*NFw+Wh5iehLjxIszvsO_?+=_gbIy*n*IIjzgGC9c z2Wkt?JHLU}?RYJJiiW3PyiDV#?rqErE$6R1&D%Pe^J9g7!$44ulOI^HR{ir0qlb{n znT{bqVx;!XTPd8{U2O83N@On5E#V8J>MabuG&dQ~V_=^9o~Q1$n$F^OHsYD;*cFjOg2p74oGT)e#F1%4kfnuaFXUZIR!9L?-QN$apnLh3q96Ar@8mlPFK zF#&LWf4MahF-U49iyMWJ7P9hakusWU=dfWqUm*#n2{PnzY|7k^7I01hE?+Mhd#xs^ zAU=!=yYW+72~z!dm(R07Aab5iT%EPvb7%kk55E@1XV@^sXTD&hbYmo9Wn_}vV}|)S zj~uE}MSDyern~Z>To(H@Ta#5Wv&8pg(Pas~;k~U_`mYrOPf)uG^(d@Op3a+WNv9?9 zc=I<@Hq!O~$pFD$O?EK)b4)-rNc(Ur&}iZ(;cCC#J-ni#qoc{Rpm)-))B@$llb1B% zkpfYH6`yF#P5-jg?%qP|y*3=?0qi&kYWgdKYknYR9*hW*fr@>Y>g-On3Zi=T;DW%9nTL z-0fI9ijJb<=e#J$$U*=W_Q_!*sHMa*zE^mrQQCK>ZCw9^fQon=LW)Zjj{u6PEzz1LGx@vi)`lzLc+7&rP#d{ z^m_i8iAKTq=NADAcwDMUB!FVR55e~P-#>`=(l806E84*WgM;eMCw`!$&Orbfep*eo zzuj%`)cj?V{coj-Hgr-{mA_UpH^pnL+bg}+tYTzny+uvw6GgUM3s8VW0+E}19zTK9 zu*os?%_Y~1^NV4cUIU;|?sCdLb$N*RO{`NFNX2Vd(0ryk<7QOod-TnV-b%o0GBQvt zktK{2^jVYc<@EM?FkuIIgS8lf&!jojbe%6#DjB6}C3dtfj zL&ln&P2l92>+@`JTkfm$%BQ_*VJWu>Q$*_JN<48|60om9UCVc(NDyh`u&zk>qr%K4rHZBaM40xH|xzo5sy%2oj8M5RbwD1GSnCiPPH z8O$;C#Mav}vz4X?S8Knelc2rs@vrfjcf2)DP13UgnQhP-=4Qn0P-A6krE=ZBD&@QH z{gNPoQAS_p0$Nr{J9~M0>?>E8p{+N6{_lx=!rmHcK^Fn)u5i?&9h;Mrwfxiqpr~d` zD6knFmGoh=)rV>FPg7!H%wROLXdcg#n)@DZz0{xDH(uUyfKpuunC$IYDs6YwWTlxB zD^(@De#d%lj9Y?gk^LaWB{A>^blr6JAG5CGfT|TA45Qvd(wt;IG)M(27&>7~d>ow7 z(c+&K12h68I*@3dJm7H3M4B-%y^LD9d{y;nQG{^7Ur#ob@fVsz(Bp$zVkZxU&WM%W}RO1X}*i#)?3R&`7sG#00z4OJI1nU4kPPvQ(FKU4w0pgcXeb{&D_j0s&w31dv zK7Pc;*cF(ZFY9PZQMG?eZ?GS!@UXptM?NZ|JYy$}5cohfy~;r~{L|;)i;aS;7rs11 zZl=q&>>@R$t-*jclwxKac8$$_>weQC7fZBXnzO!(`+*mdQ} z)lGmF^mw%DgIVN3ov7+KIFE65Xg*5?1y%9Wjw+8RfqH^E@VI)@n4p{($bqBcpGc%& zbJ|XR;+HGs_cTX3;Cie3%3i4IRx;8PU{SU7ZY!7Kzen3OzaP}E%_W#iiLks&A-#X+LI`|SzJ9%tpgQzp6}FSSizt_h0+)d7e% z1ypHGD{j&Xh%$DjUEBY!Hd^p_HtM|ny`Ya##W||a!>ly%wwFux zY1%6f@NiNDCB$UmQ>j!C|11KHpF8e0@OZNVtJb4$CB_J^%mud%!oB=VDSbjpt0>#Q z?J2L++!0++VOUp0stc|aD&^6$&#KpsSu!H*wgv>-mEWdZ}9JjL*-1e{ZL) zd3XNi3GZ#Aa)x4eFvrrMcm^4y0&8DT2See|;y z9ms>*$I)9Kc8`C+^GIL3-<4xYRljiAOH03S>)R4QR>eIjPA3?Pf|P>s!R>mpAL<~c zo#f8zxkpLr$`%BqblGZA2M`y}rFBr9CJzO;yJ*%wsR!|?s}exe*|-=ZD^N=7tg+K^ z?e*`1#nj@@pOwSmpm3Z2F@1(I$)u|_2k{Uqe;pz2+oV97kt7dX_cRU`76d@ro~LRl zk&Gl-^};L&wVyA>+_#OZ3ZR0lhG&awEBPQVPJqP>^*-c9LztN3XB>J7Pg&4}NXA9{ z%SLy0M-WX?T)}{<>D%n&<~B!RRcih7yPyCW3{_kw#htnXrbUOlOB2^|&W?X1IL;p| z^~Q)c?wRXN8#Cn(o4@rYssF|#Az|#19WUnkYeuwbfZZewILMhuFc8bRowWaTZ!x5e zOucWrs!Cq<9ZNw@JH4iUtIZQN#nihCHemx~vlQlZA|$Uc_5~YxwmiigKVonm_RpjP zWMtoA>=c80!YEX@C zu`#8;iw;bpFc-hfHjVYbX!-u`6a{x{xd}3L{}ZUE3p5e+ue=}*zj^mvHhGxf~l6X-r0vh%O+~H&ciXuk(04RvmZWdTE;S%($;NsJd< z2B~&01+4-RSKLj(AUq200ytlEY&IOVDEvu%TDO~jPmkuGS5}$7fBznR>WVS6n^Ru^ zRTuop7av&Qbn-!q>lf#0`FL$4AIdOyeb%RgjN2ye%&uY@h+Sf|$0#_fl)mwc3qw|6 z8aYSS-^$O~X4B>Zr9sJj^Kdh>y6j$Vg)mirZ#bl;hlu(s_mWilqT?bVIz!YvYBb{i zX+BIGRgiV_vDsXj4Cv?657w%~SGslqlPqs=;zbqMBleEA!4*;#;O+w_c943%z_ip7 zI%1qi=%_;xSMbUw%K?hWW>6a|Qrd-b1Y`41c9crEnI4FeB*9{<)Ph*7kKk4a$Nwuj z)t}a{hpbkbsfYm4VB;IO7 zi%m62(Y`pXR-C2aDj{7Y^v3LGg858XL}cip05B&s2#$v1!=zho)?dmpU{3TxhwK8R z!)eo->>J-!3Vy*r;ivNW*1#rmxf_o}vmTEv{lTD^v=Oe|NkS(S>z~u!K_}DuN(=Hz zpP;wPe8;Qb#{@lCz4<*1v!Ty-RCy)ji&pn@t^)>`&OSiP^1*Y zg#GXKlT918*Bh0zxI?+O4RFihiB@X*K_1%t@cq+dDh}859tV$Jy*x(C`apt{Dqt|; zK4_#SO9O61BbQir#_lk_6v=XT$p}LZVLTnSShwsh5SK4Azw87Hr1jN2so7kcFG;tZ zG4W{OE+Q%D9x}xypA1XywaWv0vBw+D&5!4+k(1y^l~S-d*V<6+9ch@xLjf#-mpp9_ z__%MQ|1%vZQp9cI?v1HGsm{QJ{OLPWap$UkAD<+JeADLv$2fw+-`^_7`5Vxn@~tv! zH_i>hBO)?DVEUV3hcxFN1=-D31yx_hEw>_R>%asd&PKwS4XX{V&~#V~(Yx6H+*{!OS%FG2R1qu$E@6qkW;Hnm+sq1NikZF=oonxo zo{ipv=CcRyo;`S_)#UPRW=DDqi`4ZdmXYdnaQJ%twvATu)Po1&_Uk3F$M#93F96`& z{LgsdDI$W1Q6Hhk{~$FATcm$_cz9A1)AYX53JVS8lw&jbg$Y?stV}!=P?dh1e|`}M z;{R#=D9yyc_wXTUoI3X%s&fR;PEUZE__^4}rA~FY4teED;q!}gmyLv6uo9x{qr__R zKXHd#Krx__-h2Hy(&ZW3y!ISE(@ox8P(6tmVe8rz3)pHoEP)+^E3TBau+|CnPyF)+Eg^v&$prAcAjap zv6V$6<=+1w1V&`klvzb731Vgu;{HTr9YDokk@pC3}K`nVeFkMU}bzVY|wUFo0$5Mz=lVfx1l4ndc>TL*sHuZ%^T;%dO-rpFo!;m+LZbTWu$4lypQ zFls34HXEEKRTX4a6`)|8)lk$I1jcI3cUCyl z%RwNKwggWJ^rI_KW%9!32DysM;g8Ci4xJ)7_ITb4l8l>=8BiMaXX(AhNV}1|SIxUC zLz(xT7k`M0KZ?5;e48s|9ZsqX-fh|@=U~+9Btq?T2k4q3yv&YpG}RmsVIFh%M{lXIm6f&pEcDBX+nrzB{i z9Gk&>99E-3FiMpVK7e2t2?g5y6b_YWBI+o(A=%OaZ`2|n-01Cjgg7I+qe zb28%QQPuYE;e44I{q>HMRoVbrwVyZ0FlZ5}R+5%HGKFW{{E=7^!FqgP6@OF#Cby94 zf`{Lsy0ib;ah>tONbc>|zt0NfPz7O_%Sd5t@V&dNSNaPZmLDe4D8!6#BiuEUoQ&#L z?(4CzVu_HT^$ej<_LA?PPKAJ$-7ocbiT%IyWpWO6?<@8cN%b}Moh@Z5=yDZq(y-Bh+|v6zv@#NNM7l;j<)B` zoeS-lu|!BAF_PYg!KkD1(Ry+iCT%;^HobWRv8stqAXzQ#7}p=>_}n4}kRl@cI*}L` zX5?qMf0HzvMtp!n8IYp?JY^}f3B$wzl{`st_-<zdO`XB?0WNkp%u*d6hCfl2H9#uvLuaK| zj)CW*I9pkvRNa?nx^d%%%b+<}XQdP*U3+Gv(ut46B<1Ra7S9dRx9#np7DxkXso}Q7 z5VWxVW{~tI-tIW9elrmY|G4Y|RIY9awPwwzz@-2wJ8C13W7j{c8a93CMR9xZCRjqI z=DbhgdoJ$2EEv(f-!+c8EvEYada29}vo7b}WQV_N9iMITDvi7pObe~i4T*1@g6O1t z8zp#$i)0Y>dTRQGP$#{DvtV?kBn5>`|5Un=wX&rDv7lKpro1E7>GR{QQ%34RRz9$j z{>6mV2CYyBIMT^CH|%YfeLFUM-wlT%DS63v`yABn7#r|qeB8@9&c%9QW`a+53!UCH z6F%sy34pOGE@vxJ52?GAE9D{*NS_!;fLzY&fNyCH&L%bf##Zi5Gy5=ps^oh&lfYTtaN-0xs7YUouN8A{bCij58g8 zPdn;GZ`WXB2S1qAfXdCq=F3ahwPE@L+!x3e@bEMG=oXiEzCWRf0}bfh-0+e(Vz0jg zryQU>nEY(N{S&_yYdL~iutEr9QOL1X8o{#|%s1`rgszqut|@9U)aijOGgdK>KZOTW zd^(5WQv=Ls_|FpFVlgo~rdUaq?OqOyw!m$Jw%2n;6_JID0rPMnqu9YmF5-rKpEYZH z8MWYufGk_{cT`{c?I-Z(tZcJBMJUA?YhjDf8xEL)nmOUBz#s!pF5px21 zwH9*rH%AUtIyc~o<-~ucBY-eW1&E?Dz;;86=h51bwa4pvtm4P?VMf#PSWO9Kiu9Wq#m$Da=sDFvSNKgh7&PXOH(H+ z%0U9!L6+`I1h?9Kbj7%$?1oZ4-gqS^ zQA!#4hLS9C%JIhRi32n$U#_Ct#+3r4?qpnUvi?Y@Q~;rn*8vwn7<5U-i{?saML=2- zWO^PV3I$67aor&fauVczu&k3))j;YDxD~)erGlzSkw}h70~xB)AF?ZeoHCE#kcVHF z%(Fi75jI`$r20F2SM`YlyCCdHd`sQ zs=NmOQvhew&v$D|sFpr3lBvKR{`7{Ams0_)ru@l@@A@b+?GXQcJj8`^V1T+b zqiu(lWPQ5f1)e$?3DPMF1HS1IlB<$y4yqQFXczN&DXuo(KnxvK>XhQm&CT&jO}_ge z_K9mshiMiT;#}e#2f?1APz=7V$(r27gn63ufY$j_; zk2xrQWc;uYWJUIX6hjx0I4`md3%*oPQL?uX#{AS54IiCL{s|;JrE=<*Oe{Etv{;ny ztORaQQjx+h94VU?sS7UsorItH`T4<50o!4})`sWewn%HhsTja?#rRL{sSZ^wMrg$> z?#5lS{^CPA;@?-1w-hSunO~c9o#JMci2hf zIx~qEyg1u+?$&=z-um8dvf+8Ksi#zqp%`%-C{l6A2Bw}WCosHn(lCjcde{6tsA}XL zSo_$TWV1AOoBRC|#aQfqo4w1fv$(j3=_9}mJOSQ+*^T#a-B+R=7qX0;jsO8b1MKpk z$DD(m-L9gBD=^!aPu*WxNr?-Qbv+%XbBnM&L*IG*FW9vf!PxT{BCN)PaMOH3M3q<% zUL^ik5>?tDOtFqQnDZFnLO>Jg9Hd4}jP#7WnTYLHNgC!eZ2&~ZvBrpY@ z;;Vc3-WAGU?r2`45(L-!gFgV$rPL`Z%K#6I;!oC>CnJu7ZttM}M0+2foCH;yn@&BA3Mllr1yi;9HK?p#I0e^q{8jsP<<~O_ zT0_Eo66gJU9sdqz2S~==J-MY^K5D5uM z^unmtJFjg9fLA8k`JZ^mnyI_uiuDs$zDAb|Vx_&9GNlVzoL8-G!9ub7PwblE;Z9x? zcw6w+0hwD8zT1rH=b=D-(OFs@d*uao(4`~`H@YM-X&jVt6;rqP#CU{FKhvBkV&wS= zQ(ZbS+4}VB5M=zDL?*Z7zNg~$j;0VcVoY1zU$$5*=@-aiHBp`qlL|YGGJ&I!1Wo8+E_?o)a#erh{tvF};$! zt@`D!X}9|3zQ|$B;&iZg?kd!#M*hog{WsWNm^;*00nUk>ONzI1$2d?jjn7Z_H;S9l z@kBGLRm5P_^>D?|B16X6ex@B&9UJ)L`NVE#j)y*#^JGP;yYA?QoY@ZabUbU=AB(y} zhVk}D+=+hEM5VrW8wb=!^ttijzZijz^1XT(>EXCvAD(`BpM&?o8(ory{hsAf={y{? zH+rm;@}9hW=cpiQS!gKI+t-&{mKvcV+-0aBtenK4=KHmjAziTq>iB`awP2M|PLzjf z)%(Hj`NRxYGN@b0yT8E;8EC5d+hw>NPC#3MMt<-~7IX_)k6N_T#-~X3))~&|!;Avw zCmtz1RZq%DlO93bm1@-nJI35D3()1<@;j`u>NPVni!R^ATJT4OUo~BqD&|K$*x&2z z?d3}9uET%ybx$F5Iq6X;VUN6`39_XI^^mO$A+YtWZv}X>dM|=YW<&!&Pi-9u_5&Gnk}t9o{~QAdG-^ zmnTpDqZ$-FlL3t@8)HRe98#xwf6+F((gOgp>24C_^3s$#WZW$Q24^6U)VXw$jnz^( z3r!tu03+N=yH})$=tQ?-xJHDNXY~u%0#SOtU!_j37FNa~dq#r-z#@rS!m}KYk7@P> zPLLJdGR6U(Bl52n)M_uf2>Ok2h2PtFGh26T6iNV`bN~LH4Vq)+WjoG3=NK^xS6w~d z{t3TsIhvbs5G8s&(PjXZWYdPvhjL>+DS6E+%D=BW7GlekO&9WJ$5$OkD$-w&rkb-z z3gMq$8;G6%z@EH=G4paSkqRkio@^8jBDiw`XXjGVd}^$3twG`IOa^Y{lu?D1t%y5^ zZxVdJWH-|kKhF@ex<+#P_10SAYcw^-8nZJ)EqkdCk{< zc``U@ALk@4hlPc4)#&?L2`3hLMJZc?;j+#L6p2Om-8Vby`o(KumJg9pu}Q7c_?Fg$ zWbaXRi&S^uD!-^==8gX?7Tl*H)Wb)5_YFxa*A25)pM3`H<9Yns4l|7}@do_>VE8Nb zagmBDP6|)XrjO)if^V{eE$Zb#;Z6P~!L`UoHaIAruv#hvBd>4&dM*nCrj*{RD*X_q z>O90KbfbVZ-k&^L-i?T;a(GS@&@Bf`fWb1C%ju9y*gM=Wqni|LLiQj|9 z8cKoU$g}e$uhjNIsBWP;@#4ZwW#E1_eY>pa;(~bLPtRpX1@8>eI+D|)=MMHZ7uTzc zE(bT?#N|AS&3_zwskoN%(uHzcwwORJOQRB(CsS>(04@#v&Bpr=?`S(@({>e{h+A{Y zb}hMW-BfMMVn9jgQWL5z9-PIl9G^Y#J(M~v7e>Uknv!oSnn*RIk+YL<@n^UQ`}#>r zO6q~hRnH%2qQDq0xg2m9!tc#zpbXqzg}KSJ@@9M*5+ol<%RFG)txDf~MY=>DbB=uOmq z=S+w7k8Kf+DsA^!OZfhN;h=ES=oO8ea%&jjYJsdNjOK@OD`j;Qt#SY)O7T8_0%W31 zEJHgk8N0)|k6Z%%k2r!YlRvmNvJ?X!{whl7RzCc+o3=%nP-vU^Qf2Hb38K>Lk3o!6u8tx zyBk798g~=Oewj+p8L9*q2E0vQ^gBB%kQ#QCRDL>EwtA~kXnn|%lfju#Ij~OfO8Zjd zfjEOt(EE719*S!NyW9RQ;|<%H{EE+NA5O5ZY*=|x9}MwY4Wd|-w((@}RV6s_lUaR= zb{{+B^d~#850T<0VUr4k_l&hEu&azK6`Y*zSF3*uP0pjva~z{yd290mb$gC!l_rsM zOsS7S1({3R;5;j~631d$_-r4Uh%-jkgKqs?fG*gvk00@)o={zEn&TbrHq<;fg(hr; zMh&9E{AlctQj;B+)NX2i4hiiW)s0t7Xca+e=X?yjCZ&bXxV+8fzjG`SRrIAxiYY27 zX~LNGr~i*#g@|nrUqhO%JEW%rOnlsjM*T>+H<&kaMuO5R%Q!-? z5B>A1*sLzD+h5e<-B36t&@0F!>Xqc2dFSak`o61+6s3l`?J0(?y5aa)g}y`3Hewh% z{PK9^ry|$8gN3a&acfy68Mr3LXWpGCSh!yQxYCkWs zg*FZ5y4e`>QY4p!*CjnML$RdlpmRVn?M|SQ;QI7nxmd?alWQIgb@8CiSv>oh7Aj~d zA}>hRsE9MVc#-`l2@)S?M=(KkB0f*1kIbHwn?YqyaGa!zdWEZ*0KxmA7l&oFb99Qb z?lBTy^jaR@dgs5=BhDvnY*N*_ktqyby?7Erib-aIA6`WpQeyd9`RK?~H5cP(x~LSa zUlAkbR8L*=A|JA$50*8?L2C)l<#H4bGfDRRSi5>nH&JUBWvoanNP$&mBv|o*SjlGd zpL@GBvBdyk`{Y?VL&4`R;@iGe_!KP%g}d*Xh*?U%=GGOM@z%>3ykxE$3BKPydI_vz z87V$II1o;jJ?Pudu1fPXinhGtB`mZjVJThvR)=7pu8qns#@EV5?K{@i3cnV4jG2I_rHiNCq(_& z*Ahy(17GnfmYBP(5E&&dF?5At&}x}5KQ8v!V6jak)R!W!V}S0U`>NX^=iT8F?C2}^ zRSI-oOV@rph#U?MWGawB!pg2f9E#Isw3g7KsUmS74%X_Zpkj(y%;x}^E246pSvWX} z_3See8I>^F;8vX&wN}UGj>40GMx1;-%u1TRa%ZfQganDilIu*u*;xo@evW@&-8bJL zb)8TAb*%GqoE;j=6+7vhEWv?%#I-nyOB;ODixc{AhOr>mor#4U^2o>n1nU;SDXx8I|&Tamjqu&JP*Em)S4gj(1pa#Lg-N6DccN3QJk*n|a>?v`ag`W-+JcE0(h z3#K@=?u@vpToFfyvBXj2Uw`7DqE9>jS^H71zNKL`l~EGF)}mzAx$))Uje5;77;x() zNlEdar9TK$rQ-y5R(({I%tQ-{mhHsq@*FgUAsWuAX~*v~^~1!|?n=5Z<4ke>N;iL| zZ9QJx`=Q-wKs`jTX>0bi=8CtVIU76sRhTE~JxCWBk74`{TxbKc9Ds_rg6&fgZP_C0E+TMxQ^jGK{LnY?VdKuf_hmkCIhZVrkcR!qW|YqiB5h(X5gDtNQ}2Bo z;UMLHCJ=*zPz;>3*+?z=w8E*FJtFDc1XSC}l(ccsJ?OFI5I8?PL$j)SRbuoz;y z_|wp3pH-1Edyn}yPt++yM5dN_k$Yf{&hzi#(X+jN4o8h;T-cAH@4K^*)?Nb6%f(xt z_xFWv9IQ&k*6tl0eeI={^46(D2nuEo@&J$^>DUviyJ`n_w|BQ z)~86OWv9j`Vu*Ng85rqEQBO6b{f}!a$h$A>xZH-jBeG8Jz)~7LqU)-3>aj2qs1)6^ zw25T-`Z2y=QhZTC-Ri><5h^c@{P&e7R)6l0%rbQn2}=)#hahJicHbEpd53P_&vQzY z-TlXf323 z_~4io%3q=A>bvYsN|Z#p+Lr8EH$M*Cn$!SQt9}Q*kR5c&5zvtC(XURNpPip>r*;QD z6>_%h$X}mYSR~B7%D})-@ooaPq@;Yh7jfm6V;SeDo#x=Goq%MLh?!>lt$ToBPPR0P zV5)u~-tgGt=2D-f_r&D{J-7E*+27yY-`8chby!#&_ljdIM}=n3)lef$F~})1n}nVo z<7djlnp6_jMeyGQ&FV$sNQRpW>7#+6JUdj0$ZHm_8>O2%>6onYlf_J%Sk#=FlV|GY1)mE*dZsTloWQ+3~4AnXN> zlfDG_L$3rZb=$7f$UCi%SxHBihDyLY(tH(i5q;|>0>f5g_Q&ve7-RjnBcTC+>o0&4 z8WvU>dbTT@Hs330Na1FNr2ZvW^BF*H`Z7V%Yav2vF%btK@75X9k6mUFFZ`!Yn{LKA zQ;fPUnNj#UiZJf8d;+N|`7c{FQ_~!Y8*C*bfpx2j(F}~f z_LRVpUWF-oiyCFAkY>GNmt3%1XTSW$)e^7BlBg&TUOKrW-k7_i>b=kVKWyxYIPq(i zsEqav_1`;L5d1PHt9&f7#%8U_=gHwt7q67sJ;1vfl>+DebgHNQ@1C^V^%WMqhqwx)l`NUsSc`NV!Cy;21LM}|BMDuO zx>Dh7*y)Sa=;OnwNt;FBkQ@@JxXq_$0HBprqd+scd0WZW`k410k>d7Cn39znOe0}EeYzT5TvkW-d= zQ92pYm5jTo`XjR?atdm#kd_Ncz52A#Hg!Z3l-5cG-o8ATn9!^c7!h(dywG1Cd4jWJ zF^N`Cq7BV=yCDmXf`BR)%Yh@kLYuiH{k6v1G;cmq(PK$QzW6nG`50EM=!S_1Z)e5h zJ}W~vN~()dQz!-ZoN?jhUY2yVaA2axMOl+eT*~;{6~gdc;W25ASkW>kmCdIdsmfEOJ<-#8ABWKUj?l z2Acpfjb0ypr80tVE$Blpv+wWkPmUgXENrS-)J@da7-|`OLwO@_S;2ydpDuxjHcG*9 z@#>zO^)VyQGePNOBK+=4a6O5*l4qbdA;R9aJ!bgPka?uo)weAE!P$fEdZoW0b$wq% z=%ZC!Tx(O+v|D*?ZGHm^gYmzCy`%H|b&8h3=8}qAcUsqg@ekqsWJa|AZ(gqb{k_cP zpi3SFDn;I$%>O%7?CWQyX^sIi+gQbt_zc{u3=_?0*QdYSzWj;A2tdCRbc}30Sf?lo z7~GMVaobmP23&T!qmhed?~X}E9vx+jMLA;>VfVStwO?_V{b+vKiRM)m8nl_%Z9}h? z3>!5vIh6JkO>)He*UE~6y7C{IOgy_cvRangPgTbu&Qe!&52IAEP7X{Q(xy^Y6H1M^ zMnlrl6H3ax;wAyHH$3g(|C`GE|3E3)f;)|M`oQQtGo8`lT=keOC{@W5D*U zU}Hn6{v&pLw)zo=DnmDxDtYTY%H)E=fhXe%y%CvRJJQk)s8~ty0vF!9a^0CWRWJ<{!^Q-p}p4b$CS+hucAP>`#l6Qn;D$#iQRF6I-t% z&RmwGYM6DuHMXrdS`#k>@!?XuD^@LY7=FeIiN+OS{V_9hvy{KwE--cuKPowqkcGQ{7DOw|MB$8eB z<%JE<974u;-6khk{Y32HuSYMly{I#0;kCv^%rJEdK7J|of*_41t}Dbup;R}CE2-l1 z32oX}S-aXRi2dkMEvlLtzEQ6Z@i3LrhP5hA`Nz~3rC+(5Mx9kPuMgh&X+&+MLRH)T zH0Fy)v+sCZo@Nq95@(v=3#tJv3LQ<~Xnr_UCY&h*R$g$j)>w-eF}H%=1Cw=D4`T)_ z{SZ5eE3?Cp)zTDYBXuR#>w8!gYBSc4szx1FRMYb+7hVBxEG(Wn3t_UyoK)u=aVhRH zDJP`{bC8wKGt8K6lmmgUWFLfj810h%PiTM7+B=DPSyMnm$~h$R$MC>F0RDaJuD{DObG&{AzfI1Dz88mg{A)u-1fqg8`%p{B?*`Q@Sa$m zvsKayt(pLwFNV1BJ0m|EhAuI8KdChhMU>ph%E#SnMT-qp8GlgVWVm%5hvJ%9)GbOx z)!fJWW8QW?m_6ZPG3%tse+mB~Tody>>a)g}Q+fQ|TtWnNM|w7!-@N8Uud^O{G6h+E zXYeS%|MV)3p|hWV8TUpU8)Z$Qc7Iu}Q$G65&$eVamcb?x1h4uKL&%7J%A^iGrIpoV zHuk7L3ru2S`aYd`pZ-j`-}7UL3Gh)V&$#1+b7C!ON%B!PP%qV}M9zu%8ZJv`3EO!p9& z6PBF}x2`v!^fTC1m+g5*ve3^Nqav6efs9@@?D0lUd4*UeEJX=3h_sHokrdcR`(%FE z51$sv+P6#Aq6gKhPdTuA9YzM-e7|iZT6K^sJc6(MiUj(6(IPRr1l-1^M_=_dBBpN` z_DB=M1_(u`eXjrG0>tfbR@JK@dhgqia)vKQW`tkzP{8m}-fBaa!a!`%aDw6QhbK3a z`j$G}Rl~yj;T1?RuTrj}s9HYdq}y{&Y*9iu|GcW1mpS|J^>g2^hLI`Xz50;t|1Zn_iifTb~dKxBC%NWs`>GL#>br zo{6tuPKM<@mNI79#|*fWFw3egl3M;VedBE3Rg~ulSR`@>X)q0Kgcp~R<}l;R>stuR z-wNJp1D#y#vtpUp6|6%p>uwwN!sJWPB~7KwuFTvZUZ7%Azi1GQ`0pRXj4&(KAQAMJ zRfL9y23`@6zBu#9-n1M^-iFSQq~g!o2#-A575pvW8{~+GfcyU<)YRzjmWK_G4+~xr zbNNlNv3j1taF3XQAs*^}TDK-st4Bz~ybL82K1CB|my7){ok`^i@t;4984S|6i>{Hs z8c&sgN<$2tixLnPoZe8TDZkjgz-%PfCAyR057M4!t-5kDg>#s<{>B+}KNdwFufp=C zsthPe958^#D|F-Hts7l`RtB*@V1a|&hw!Yv&za1wKfmxP6*vqDVnXsuZ|u)+ zVg&)qo)PxIi!2hh-jG7BPg|dxfn$OWf2;Ry(dP4Jct^ZgUf>UaHU;+RhWU}m2>9x} z-28ltlT&4-6)2D#>VNt)V4lzLSiE-ZcfZAmGAXxmFuLUF#<)`3dt1~e5{nh-XTJpv zxIwI(l^}3gf&(F6LT52=loMDt3z9~+z{y*yqz<>-^Hqu*hS0KOTI6F4aOr0(9dTKj zz+9#0et*GkOIL!SpyO4N>Gj?_5 zOL!Y7Bz2hzt&UN+lMN%ywsxu74Z)Xq1ncE)zl(v}&qoZm|4OtNLox92G0TKdUuV4; z$FuUj%U=%KoN20L(VDAi3|-8OU3>-;oF^@$r`oEKDNp@UX%ekkro83@UR0oTK+sny zp9@5k_oU-6KGrSi|Mra@)7pkQfJ|?54M??I<2J;rByM!8_}4>2qp=nDTV}IspA-B! zpyn!|B4M$e&Yc1cGm9<^CN~2xqtwBBoWNGd#}CTxMtLQl}0%KdNIi|_j-)xbab12o^5U7K6NV4rvj zdW)ajI`D>`ZpXzcFC7Wt>On8XqKm3dE6cfT&6yW>_7sY9IXb|j)qp~YAc-bX!gHN?J=fOcQhfe2R_N2p zNtY5Jxm2#cmRufB259cL3kc@*17zA-TFMo|zPvIqF~G{?(?C)?YSxg?#!dZLi*c&# z!2I?#{{&TO_5F*fx{39z$3MB(t>S=-k9qF{nWH(6&E?!ri5e8)4g(vdFUzF@+KIXT z?5GFajQXrD&|Y%A_4+b&iqz8nEi5{c&TI2DWp^y=aFUp=qYDcQ4c~eT0ip7ejEoFs zWd^7yZVs?26+Ok+QL*+e;Xoli>7sp4GKrXL)=BygLbW_2TR~pwacQJ8=BZHTS~%20 z1R{iAdq{5N!x8AG81LrT1UEVl-;!4IC{QC4H@ZbWCBJY3)~IAQgN*fLlA^qD{#VRk}DW z65T7h&n{oGU&jjCOg(r7wV{Ix|y1i1>=!WZ&yxgk(3EWxt+@Rd^amYw{~V<`D_ z-yXoA-riax=do}2jYL@`C0K~S`ARAnBij{cyW5yPcXJ|NfLW)^I_*0>JQn-PLm~TQ zGZaXkDE{6_N=2g!2x&pog;~JDSwq%+ff?NcJSHtFDrGQK_r_z55M${Iv)V_#vW4`* z4-&=f6qlhEHtr~&+J`iY5Mwt`3hj!mQ4ucr2g|noe9jEZl^XqzGEtAnzwrO{uA=(MYwgmrmY> zn!(||)Ye0HW}U-EllVd&598yxCtFk6U$2p}l>YpsY9Jo&ldM(Of7_#cDnSFd1ct2% zd9sJ?z*V~|2LYAKVfxsZUGR>w^3uut~0MI7y^64S~E49xVTXwSI3k=976?Y1g7LEB65?NQ=Dc;K~Em> zpqTfrADsBZgqKRns35CugK!^=B!I=^)@%V}nuiTeGT^Zu3Td!4f7Sz^u=IC}6* zMcOs_`{;r|K3o3X%V`X%R_YiiM646*cb`C@_yB_VD~GwnLPSA8nIcE@GHBudAai$Q zDD!<|5}A_sRKatM41T)(M%vX4Qd-(<`EuA8TPlAJ!j8w^3*28<%zcGA!}I4CjZtD` ztUMf_w&3(R%c^qSaZrzP_G_wG(HB%0iBhy|T~N7HKY*bmbaYOd+$IkUn053c=D0X) zzO%^47}|81DMqHWZ>8+|)(gFTuh5IW;ad*0eCM-E?ko3koaQ^|{qxVDs0fZ_vLMIY zrr`c&s=BF5gYHQxQ8olY;7r4b=*@5s-`Qw?qi+L0+=A~)^d*4EaN%lFzRg;s#Y(O>P+_#2ih9Z(zu5i~0ygNERh zrqaku9Q%dgQRoKYK68u72=4?bjxM2YB8w7vkne6Ul?JV>y@3c(2%GDM5t%A2Jd8aX z!|Pbj${4WU^AHYVIqna5wGn zH>%-|Hj}^{W4stEpPpfo2c(LrcE8udqLb;qAZOJ^M)%UF6qo^5FMumU$r+<$gb{Xm z(mLrXmm+seLgJhqJ7xj4EXWkKs|#esrC)~_B67&DwJo#Bc)OmB>|NdImrb2rh?M-e31(4csbb+FAP?+Ih zDT?f2wG}P5Wx1^YA2ZiE1lpMSVIz{7P1QE_Tf(wLSXPG`CV`Ru85P9{@aCK6#>}4ieyY({Ga` z)6X_Nihxqk;G^-3zUZN$6-g9kFoZY&G48ZJGtZ~t&48-W8=mW05x6Fw@g$gajG4K9 zlqd)0i(s^Npha_heYKpJ%cpF^lDTlH;jXZfPU(xQ`@S=U7!QPVS5P@9fyc)|N!EH1 zD<*`WEW*d!mi6j4?I}$mm-_p*4@^JDT&!KXcQQR~;)|ew~i@t67)m3JX zQl53Y`_m*n)_3uRHTrUu_;lZs&cSck3@&D_UVn5^{hxa2UHDyG-JWPnGmuu4lqjOu zIGCA*?ve1H)20)LOdV7Auq(baxNC}JvFajbpvAjsaC3Q|KpFiU3yDps_&uyzlwW`hFaK&RRP2?Af#TeP8z#`mW}5(0_8XlA9|sT<8-3>4%~v|IjPX zqv{$xW=IDTfy6x1>##9v>v7+$UOENyfA{y(1zy zsFMA;GOU9C!*w45<%gE+gRvGBk_aYt=*RCaG~TY&VR_+CJa2IJcvDN>pK0~<^;rN< zKGCru(Vd>&-eL$ogN9@4`DciR3&I=93!urs2HB}009zFiKo<&AlJAl1yKXLYT7Iwz zmYbIe++&Ov%Yi#VA`GrCl}leGuMHGm6n1a#w>@v<5zc`LOEFK|W|sHw8piLx#0_6x;Z@6yGcbrWIp?cfi9Ah)sa0Hzj0wzs{xF=M8#D1Y= zn(Q-|ohmBWoPvJOQy;tgJQfxfR0y)%0P0c7?HYO`lIsJE4%vd^CaRQjQln^dIT1J} zW{NcF*MP32XZA~vKIG#`AN-x?rrtRFX+d{)VmxUa;^ySnx3H8W;{|f;_JuNz_+M+{ zL%Eu3Mat)(IXijtQ-X}HBf}V-04&@o?l6LZ4G$VA8Dl?o-o*a_mFFC)T$w=6Adqw_x@d3-kR@j|j_pFMF*8)lM zdX0eS`)UzHZa~xh-^K0>s7N5%;-pMp!QI`wpFFCBSVz3<1z9 z0rbvU1s4{63IUB)pZA|g?!7S$VCZsp)Q!WYIIs|61vQWhLV_U;7$j(%w>U=cf{#** zq2_dk@uCmE`0;=`S{xpEB*&gY!ZGu0)4o(a@nyGTq~gva-u_r2&J$y3Wa;$$W2~Q% zZ2og^v0u^(kSga@1^J@XQ#ILMCs*$jK)_)?B*cjrOlB{Pd=)Ot4Ssw>)3u<|`NJ z=Kz@x0IJ5a6PRqz`Y0TW#2N&D$Rm1>+Ecv=;GmL=!}k(ia_V5F!$1 z0vOJt+}izEIEDnK5RTLp%G#%TGu)Q@Ps{5mMtF>NDze557J1aPf+y6u^whLWD}o6L zD@*Blly@$AL5{yqBCWSI_0a*pj@yqnQ5v8F+nPFmSrL(dM-jXr?K<`My&ri>y5UK$ z_y0QMH5?y7970TG^;OYMthi&BI%IzmXy7hn`85TzFmd67U(=Gq)W)pLe2v?vLW@$7OjaCTe)DOa*W@sxwM`!W#24~|tQ^$s zd0!JS%Y!=xlGy>3n~nor2edrp&t3l&;Cb`eHHe&;V86`Aw~)ni0|&z)rHuWR+SMi( z*5e-X5`{kL!5>l*uj!?$R42I*lNW2N>1a42INA2SxVXkZGqu|>+o_u5!43KpfEr-C(3va`o`I z@tp3SSL;>kmQs*28u6w}637c~dPSIC{0}9q2Jh(tX51*H5k&b*G1n)@zS7QB<`-M= zfm2iZM5Z#z_lsU5ob003^7&brx0-9d;?XUi6g4l8g+-B;|Kcof>|py>?j4zhHq7 zB;Ncz#tV-|SCreMXJBFksl?3Ruh(SV7R7&Vr;tqFpO9iDE<_M2YwLf&Pa9`_cb1lM z-?bn1D4P>ks`*AGJn4rN=Z%>ZW=FgR5sQYjXOXGjwN*MR=SBW-PJL+vRiu(c%XhL% z?lQU9mWlXgC$-YBT?5QXznroB99pMWO$bxLjALfly(ZV>Y zrPZJ=_*2hPR}e@Vni{%#C)8jZ_*@CxIPTmYO`hGe^Qa4^4@c)~?6`gl(}V{6if5|% zq`xo6V7ng81_G|1w~{U^rhF(XC%KYrQnPVc3w^}*T>d`kdm!2Xh0+Qx(WKv zbtek6XlKXkR}4UANfiQ}D3@xMI2mv)s%n-uN>rMr5cXn%zN|BoYg?LQF%zF!Ft2TF zd-vEFHS123^D;4kYzGuB8%B&6so@JERU5CDGA z2efmV#ZboeuiwX)D}GET>OJa*Wb|@as_#4&k1D(fqlKniU`anfD?6B_vvm$?rb`}U z@UOQp4oq&5dw2Z4PLBXQMvKm8D48yB)O($Bn3xOEDiTf|k!;2QSR1KUPIM!KsX(HV zFW;u#GmqJ7m1CpRdmi6jcvZjl0$f(RATCz}sz6`}$bv%@MGw=kxo>5-$e*YmKVbd< z2T6`@ulpd^=tVPo|CY2hVC~J_$E}x1c1;6YMx)J6pty;YmRC?ep+EYQ;3+ZT zFVKyeKCqVPRmK7ozNAFO3y^*RaCqlqs3P=4M9}SOO|$9?XdhTK4=7P}gWb}$>!5f)#4mel;GUs~E;DR;KI%aV>9H_qLDm;1lK_r955PQL@-B)%}{ z0pjrFyi2)S{_h7r;`!OOuozT7g@c?&&)ovGD-1m@lfFsJHig1TvDM4O_EI+A3Wo|7 zojO9%%e~+YM@bt=&o_^FXL}=>OwoKx682E&uG%N4TZ6){eMhladvEpZ~C& zL2f|&Zq4CVo|cYI24KifIIir&lljRvQ&_3mr_+ZTpF&i z$E)KNW1EQPSj`fn;Y4+CqsCfvy(9SBsQ2!};b{c~YAHz_RjycHbbGki8s}$nyPCYg zou~LnohHLBr^3(jQ79glb^d(>%Nr}CDb&BxMGGFQSn@^sp28B!rH;8%iMk8VQB!}v z4kvWYr(O^Zz172}RctHyX>x@ewoRX$L>h%U84EePKwo1uq!BsQ>o-Gf_lQ&i-h@QJ z8bbG*jQidc?8Mr%!AS;>g*cJ+X(Dm{r633Uh;k6$J{S(2h+X{qWEcc@vN8|2*9eYo z{Hm#=g)YZ+|7|hFR5cT((kanXSme4t@`+(%WndA2@dcdDE-#KWYtigUv?_X*LJ3$+ zPbYdRk6F&}6#PY0_{Bq%Y*4-(JT;ijoOmJNO>kw&DXDj+sx_T)7)4wAz{ zO9X(ba{-$D1i222j?M-tIfwb(Jt&qLUrF5fkK!aM`UTQlmT7(UN~#Gb^fzmlMO3}M zc*(%XC?TJJu+SETv+w|i7=|s^&4eZ34F|>+|N9Z6pIn_F+CzC@N|bn_LFPr@b}?f& z)$ZReYW;)>Mi!s|NbVv(rhfRKz+D((H5C{<1<1o0FFbxOhR688JsLAEXXl~$AYyjU zkC~-VB;%3<7FF4WqqRnc$44EV48{Z)f5S~J<(O;D`X?d)Bj z-mH>i6EJnwqzm`)!GgtAKz5D=8a%H$g?QQr1Wqn+bc4Z7+Z^m*c@eqn#o)n<=dR0gyhzBO)}%ni?C6M+%-gPf+*u6+tG^ z5Qyc?fsdhnojn=wkB9^&T)EPL2*jCxbmbzWW-d+zeZ<~l`^~i;6brmvgQZnFiGp@L z`$Va?r^jM@j&Z8mhFRBWE zW~j@04HFp=u5gBt41WDVsq^4SlH&FfW`5`tnE>=IWmnSRco2`_CpIPSGb8 z(yNk&nZk6*%*0?m7CPb4Iy{>eow7g+*fAKD&km%~M(EE0WbP6x`K0iHF zu%3AplB{;!ljZY_m~N~qYdn{`7*PVk#MukFN<%cpo=|$O4VV|tc3(^!rlPD5VFG?G zEEV@tPp~ipb1xk$=o|0MppbyIN16d-t_laIKuqI#D?}kUau`Odwq#ZjJ*`4Fgk5z0m6& z7j<=*CJR35tWSkSHab2)j|^q!;6Ea!fYe9!qBWFV99j2djJ@T=qn(`9nNX zn@Q>*&^dxoFYs4Fu*{^Ec4xnNH4`tdraVb1L^fYl=4Hi41tj1=81xh{2Yh18AGB?c zFE8~Pwa+mQw0oI`@XGg;sgIhQ4EnPOA<5@dB#`8fd-h(;5#v*g73QxnVhEn57f@4< zU4z0`RA9h4T;MgZ79_2Jb_a&BY-Oduz=y@Sp{l6{5I2CAN7zluFcBgmq8fr`r5xM0 zxTI3d$j=<1mp5*TKmHAm&pD}S;~vslcE#U;s3c?Wb^>_%_+o2EwPc7R=mJ}}+@ zTKaJrZ0{xwKpLw&5iroU{25QTBj3$f`zA!G^#Q6=V>^pJ?~UVWTPDCxob#7`7$&C9 zQ+IcV`1Tc9)KxIj+Ao4=XRS7w;{>Z}kA)8zQnz9jmX;t`igCpCxn(CgA&WlJ#Nb+= zVzydOo!YL`{f`d=mgHI|Rcfy3prT=x&npPRk7}7A+X2#5Q+C9k4fMwU$Rn2n1n*@Xk*pp6r;S~ zXo8VYC7FlH8R1;07eY1cI-5umuumS>^dF?uJrs|xjHvVJ< zecN@T6J#RiB$tQ-vDB--_bK#$+=;Ej29&bDEe`jT6%$mwJ*6XLgU+of0vg+|_+MB- z0iZ};T5k6-nVSYLiIXq^Ol*Lm;moxfm-T)x#M;6WNk8>J9ltXX`3?|nseMVZvcDGd zG6$~8r7F{}fR#Df%~`ZKAWX4}#(55W+fa$@GAsDP# zHISlBG_1cYR2Ul zGoZZ*4=5Ia#=;d&C2s8JK9R(JOwXSz%|7$`qOXWH*Xpgej{e=9oPuL?q);*P^_x8k z*{&W74gk3881i$0l#p~i_YC7d+u_>nohH@q)}4pls{HP6%!RXAyIi^%Tu@%?pHMiq z1TM|^szRWDHDN`@;uLO_A}J29BSgle`oBeO5{d9Bty=_M*<7MiPW`|tjC~AZ{qea- z>hBAyD(PR#Oa*SZHV-JMXBadDasA`CFb%=08kJSN+Hj zpJPCq;syBbp2w+ueiJS$1-#=y4S;*)H4fb6khYf1v*W>+S$ZdPQ56{;uJKs#@TJ|g z(z$;-JW2PfuYs>y&6$#Bn2=fdHH%EBzBMfpHLn;jCaq@1DGRO8w?kHntBAqe_bWpSDL$SKfROCqSZzec(AjZ&fB) zV4(^TxZ8KxsS5Zb`nc|zb+ZX3Jf&S+cP2ZF6n=ttv95*kHKP!dFEG9r4-v(?uw7($ zqbG(EanbB$#;X_kp>gRk5n9Au6X(j)CZAgLE6LsN@6?)_J&v z+tXSR{4-jEulW_<+-Rf21_&XI9oV?$!elWLcIH>l*5S#byt`TOYCH)#Thu}YV2voG_kkD*uhHp^1abT2h!58=lX zhop5~M?_lk;Yr2ezs~>31tinfGDuKH_2Hq%F3XQv9{e#RN;f|&Oj7aw->s`)J=5M} zTB+ADIS7ZN4?0CoFAL+8fHKr`?3PGo(?=%_K_h_wYa}@>;ZHqNKEosL)pupW+{f z7AK#6ZKYy%oYUh1RgImYBCHgVxstp*t~kAv5ytoN;H-k#=GEY-6$koDwH4to-F(&3 z^IyWTVFA^0DtpTPv$tgxO5ud3OkMPvUv<6j8Z0nMbXdb)D5~da#o?t2oD+OF5J8}( zKJcg?+f9AmM8o9M2=}smG5@BiWxR^1;1%UFb#KiL1^&Uz z{w&=LH?wG#(faA%AkWPiAJ0bo*pjfum!%2moSntwFZ794z$$qWuB}UB*?0&9-0F&U zquuWoj1x}x?h7skc;6P*GC--CK1dJNapF=F2by71_6Gl9%5|;$y|u7uVyRvIQvioX zT<#)c`o{--%a;%VkpD5!y*~YY6D3}?eC8dV)4qp>1U@V0vg+W=&pHWjBbhn!oD_}| zY#O_7NC%A}j2HQm7k|?X(O@_JHU4*=I0dR^k@rTMdAfNOq(ZquX>Cx{d&wIF)C5`c=96|#hUFi-h+An@crF)o&EhU@AqYX((g_27?Fu5 z+q(DYB}()h`#TY|1k+}4inobyCQD$Yvb|A4?014y+Oi%e2PPnTo|_S z-JzBLB4ai|$?9Sc4L0!XYxLv67C>JjM)K&%YQ)%$pH;9VmK-;sEa zwn$XfH)lpF`AU9-4P+~EF8T=&&M(*$`%T>J$Hm9Tqxt*Olo|7A-g61Sb{5`o3?#7y zsiU~-_|Mkt=RnN71&1|zif>NW!A*7TI!{t!?zC7xQR~=tN>gKRXy#d$n`fC!617k~ z>rS2X)0GpK#09YWGF!Z#AwP@$waMGZ-g8_7kw6LR86Ex2_L%|NQ{xwcl6tbAqR;2f zYAaNfDI08Be$Cc)wSjr(d80mb<02@yXB0Ol)5yEATKzMNo&p7%s_3Wg)hbVPx*)bt z<)Yl6SEc1Hw~;hdyWUvh!{MZfz>dj}4_HiCkCeNY58++C7_0Mw^M5N`#h%+uJP%RI z6!AmBe9PU_^$`<~g5cA?7(>gMJDVcoa+f9}Zn**HMLD&+OD`!YgIBNB zo~W22GoMABb)iqnJ#tLkKf$@xxR3%o>sRjGj^uPm0E4^wD}eEKei% z1NwR=pGSUyza*m7K#FdAo?NhENB^WA)kr=w&|F-q(aR0^T?$HY8@xs0C~xOSU=lbU zC>ym<$RF~(SEy7I^Pm{wJw9TbJ{bQWXK13RUARojr>yACWHX>bDr@R3n160j?}gON(M8m zwm=3Jm1X#)STWqSKIR-E#2;V%8B8SmrObU6S5P zSBldZR)2F=qV((SIR8~H1Wav_`Us!ADATV(D2&VQTr5RFn|H3SLu$~I$)`^Ce6&g@qqDJiIuG-EVU?-$~#v;TBw z4ZGql9hPFK@W{n|4FuzM5?gJ#yp6-vzQ$p;A*ZdaFKpsGC%pLn*Z*7s{&X5F zOUxacgY|ww&#bsF|1-Xy#v6eePKnWO1oVQ0w4sXnloslj`-%LqcFGWW^|7#4$(}%| zi_(^F>NkFoP^*^~V@+zLkLO6@dc{7WK)hab;M<%xJnL9e?ztaQka~3<%z?-1l^>bH zRUr${>h8E0QXePu-O7K4N9q0<8q8(RJs6j@f8NNbY={u-oU9-3THla)cJd91@RLNT zr3-!q&1nf1q7{e+0Rgxg1677V`&7?7+&eXW;0}I%x-_f_U;KaDOUHzo91R}yvgp&I zrEo1u@24#2nC7)}87ojEFOI_vwo}A`N~>tV=h+~;$#*p3A?k>`>l0G7eA1oC^Lx!H z_ah{ijb}Yc$Deq-Mvap>7AnpMnFxx3G3m;WKa$;G7N|X@iBH*da^3ma@R_H?7a~@k zi$eyxFR;|1hX?zjbpFkA;aSu;#z{MLNHT7APS<`Y4s~D~&AH~aN|LYGrR5y+Dq-=A z*N3PZ69sBkugr~#v{E#ZD7k1FQR>G`e&loVN&ad2k2_PWPdL|S2mRcI%l)*P{xyR{ z&2LR>N(Lv_qq|dJAkoC1OVax03#@!7O^$+GmEuJg-ymzLX{%_3X={mK=p+g<@@wGM zZaq;#1lTdL*6IU^X#z+Yp-NU*)b|U$p)J||hcN(#3;g=_SybLp`*kNSCFPwqqwvOg zDzupMhus+a>oT(*IsSJt(k~&&qlyC)$nr0VwHC79mZyJ{{*_Tk!Y&}!{QJEQZ^Tge z_<;PP<-+7CE-quD7(B?5Mh`heS- zYtvhOwe)8`iw1lry2^)V!eqe_Nx+cOZ@@As8KqO#I98Pr%{nwdS8hN*#Bkg$qOvW$ zAX0A21Tb5D!@gH!;}(}$GP{f7JOx{^=vU>a1m^ItMX+X&8CWlTZjiNm3qjX%!+VOW9ldf|d{_ z;Z^;(rimrs-jH|JOKP zyI-mkyT7Ob-&|r3`tJk4e+1^%xHe^$0B5a8r^B7kZA>S@x%f zGEphB{eo1Asxh`cUFXMt-mdb$^WR{;5dMc){QviMoDu(dyY06npoOJ^B$Mk2j+E;) zN1mvLi2FoywJNO4g|I8BsPrxQIa+q0Qt^7uSVX4@qavTIcc!eOuJn3MjJEt9x>sp5 zd0PAd;n>gs3H;)Rd6XRGC?@Oax)AyL7k{W(xqS8?*wLb1$tH@fb3}fk$7;JjGDACa zmQP2K=QsI`TdYc#fSExF{}LCJA5^zr!6!t5-MoFk(ZB8%+*=gIp}|fMRgCLCo-czn zxa<=T=r~5bEEFm#Xd$w#?Z5j%pQOl^w_D?p4@Mw?xzWqvPqb_GxFi|7gkpaO#T=FC zAeJ=lur<|h)tN&PO)g@Dg}>*ek9{5#U@X|+ZpV{4exqgxF}Hat)IT?Wi>nS+B9qA% zw4Vus5w!JgceBIdk1yvT~s#-LW;o6w$lnXz7dDP)}WUJ$ovYIQ1y)U4eQvW?5Z z#@hxTv1gjR=xJs1FqGqSH^pk7z#<(%^@3%e;z>@q0azA-K@=l!oQ2_gkK#AKo#h1t z5Sje9e~dnzQ(Riyh8hU_?Qlm^K5`yNq4`2B?*6n_``&X9-K5OWC;$CuXy5o>$`B)#(8c|ei+ zDXRf8`I~%w;S7)y8P&sR<9QK2GeH46Yk`18ZU7CUQ~Ar=dj30}0&bU#Gr$c;et8Mq2*bA z-O+wmJi^r%Q&n#NP1fa%KfT)y$i_tue}Zx5baYw?sKF zfY_UuWq>p`Jb)8iL&NOQv=iWDk=>6)75BD0E%#&pKa96tDJU&saLwum3hS~uRo6#M zge=<9_vM#jz9VHH`3bpoo;eD5V|HGj6xgeM(0;Dq$16*e$D;@4L{T}ui=ZE9yHu~K z7f1LE0qbk%cW5v03AgxNc+%H6Z5J0l1jM_MGG!AVUI*HsPc9|w+n+O^f28}4e6!lh zRgQadgh$M|7~U;LV*dAg_VFUwAdlZ0nuf-v0&kP$phy0M?@h;mF&mjNx4DJ36{0l! zaA=>zrbm8wzF$ErRdowjpq>^eAf}u5)FWNq8z+@(=-!w6CSGjKm15fdD-RSh^+8nR zrmX!b2vlfjWp4aH?$JEWra7*FX080R%zwc$8v>OQigD>S_}Pc#N3U8xy;J7>J@-($ zB+j6}DT+F`y3f=uJt4OV<-OEF;Yn6x3R{Is{_hOoPh5^4dK+w3wPUrL^=3+)q`iu$ zLxU|wkCaLIt!miBUswD`m3!M}7M|pLm~HLoiBtoDexB5BChN(1jsr^IjpcVgn%6t) z$ZIML5?C+U8ES7hl9iVq%0?+T|A?R z<9<$3@_Os)>(TMKX7aV$H~(933>R!@B+xIN)Z>33|(efvkWXeqVYFyf8LQ2(c=pV74`)1fUaP@)c;B zwOL1T8}CjHjF}}=k9bxDUzyHFU7C^>pnjA?rFa7DP;v&7+@AFB%`;pkEMWdmGHc$1 z55_e8ss~QkNTAGOcBJgccK3VH%BLPFBiuyUazvknSrNF2`Op3A+rZ7}lP*u3JV?3^ zoVp3jJ=#ET^CdF%RRuT{6Ju}%K0HA{k&%Z@Rm4KnCJGZ4F?dUvFWl|^%6>gc)yQc1 zPJxUNwfv3meGMtJuSyI2>*?*jh#B1{1=B>1TWZA93%VHK@nQ&SIb)w_paYccZ8)gh z?`0#F23Qg+MaAdDIV|1K9ODB$~`3U28y-W1?kc?5Rj7ne>tu3k4C7Hu>#XaKm7RPZe^HkCw7UNgy;F!Cogj*jM_ z@fT57kqa8pK!8jvNg*~r?Oct<#cjNwp!f11BSktZ0O5a# zl|6$0msly&!wE9j!;RZKHAU5}lF%y<{44F7#to9bV}Y8&{!{aitrQvNSWFWAL@6-)pliz z?vSw<&93K|r`Yb?;CIRuNx~V>>)OW>HyIK&Ibzr0G1o|e;4fvS`gkH$q*XfF?zTJr z1+&Pwn$XlUk6UtUK_NWJ^UsetPP7h6P~f!v|Kv2CKQ}0X_u4#hw-N_;UI#pz#Ein| z%-gy}i)xCC4;KrMh?D)g$EeAmM@INt6JNyl(>g0I5=f;0+n)4k+dnN!XO^mxQcsta z)bjkR%I^7jHBLV7&QM`Lb21e|c5rhA;k1FT^C+A}T<-_f&Ui_5g=4*R>x>TW(qnt* z=f%e=Dl?x}Qq;O(J^NMwh(7gtiT&6=C8P4Rz3&&r~%=s=YZy>u1sW&uxcKla4 zb=7|H8>OJ+Gv{fyCggn`ki_NoAMF zrX+JCI{IP%>0K7d$=%^;+u>iIuaU!TBk0O9NQ`(E8Uduwq|t7cNm|j+Ot}(#f-t!j z(WbuPb;hGL3m~)BJs*nAj6z)*h2e`adj-ABQjiM>)|R^}awWtiRaau=%zpD))U|?Y zx4tDH>tZ$WAS*Xg`wga1t!(7$Z5pSg+8ZI~yUg}g*-b0)8zuOQ$2$?9b9RxboBI?` z#-g3;@{FZlm+E;pg_WzTT8^(+@)I`w3Hpt8s%p%6^&5p84OlbujIK$8XuD|8%zyF* z%`x_pZr>hlAaLm)Kex4GtIpkejSq#D8|Dab_q{>A(yBmQNqovg$Z)m@-aZ z3t?p_BD|jAID4B8!Tn`G@c4W}p5I^DrC0CrlakaQ3JY=UCii9Uxl60Pbe8kK($fjB z=Au&+87j^1h}_a&eOx{Kdo(MxG0TR{M6Lfix-j^ z^-D~2$_eIboGUtodf1d*D`DTOnon#VZV3*3gJ}-$u@=>DdO|Vmr_Y2{gvyrNjULM1 z9nD>+8G8WMY;zfe-IA+PiIa_^)5(!Nn;jQ-#J4deJHqhnJVuCJXE``LSX(%$2n#PY z0}j3T7nx?%7K5}bo9OB*(=c^&ak_6wJ_+mDalY=%Ql;%aHl|ar65}x_CK|AG*z?mC zulO$}1Xs%!siYTHDBJccwvT8uwPuGdqaU`pX_h9;sQRQ9>_VsC$SF@p>0hm*R$qQ| zvG;C6Vg0e^vCb^j(R;w|bGz{MCdH8es+wAD1f9B&b`$CC49ychK2W@i)_xGG%PFy7 zC$Zd{jf$MFwf4GaT*5j}NVp4e#hd>LY-v{9@LVsLsr; z~=94+?H$O~T@IUUB z`z9cXf4{R6_Q=8bM~XS~$35@ti-hsn8kYwy_9jb~mkW`gUN#=o|GB7p{%bJ0rOd#A~_Nm9)Zf)rntY}-eML$Y2KuD@Tjo{3udSjrsD!Fl(S zSn^z#8@@!TSg3Bz;0+RZn!Y%%(#!ytQ)M?UHp}Bgo-ciE8V}a4Snbc==Z2m9`uVRG zBbd->yBj0lsxp*O*OYrEhU2;W!9Lag3$w|^_{No``?R1wMubR4X<3!(uk_uA{9sKk z$#}3TJrI)A*(8I<4g%PMlkz~=$C+=ynLI@y)sm{})TuJ&%BXHvR+}4BL7_-QKpA>` ze8NYMpABSS7ucdKkBvvtN?+$gx)|8$b?EX*|JqT7ExDRX8~!6F<#aQUY18XNYQa7; z5l46JLdf5c^sWs3<{TbSf(CMw^Xlr{M%~{P4cNGZqN&OTynk0&QX*?(}t-=SLTF-gts$Bb9|r`Fj%%p z2|o0hV}~nNY*wO^sV7PGFHF4b49rpq6Th|Ys-OE?okty>(bykoUmsXj6;=lFG6_Uy zjR&L2aR(L@Fs-evXXew)jy?jiCRX_?mzhyF?v-=Bq>c9UrL zhLfE9cr|x_6n#O*(twwkifQLUT^51UX`N1>=~T>*7p{u{NMJ8CHSs=k7~q$q{d9yl zB}y9^Q9S8bfinj#5kwjZ0ye`Nq$BMa0k+_A63b1B#%GnM5T9<7qqqy&rH5zamjdMn-p@jaZ*&`#+!I?N))JL6z3BH8kJ|RAm0f} z^qswW^#c!{3~%e;0HstxM^_gjCT67Y8OACneT`Hgkxkfwi#))*UgMGrRf-{j`zsV4 zuqle9HnwpL` ze8)fNzI^#oHM{>o;D(SCscZ943|tBYu6d_Kj*T5+7bM0kKPy7hB%1_p+s{j$9%#3p^&3K*n0 f91c$dGw>0Ce^C*0W-?e84*V(0zmltvH4pkf15r}X literal 0 HcmV?d00001 diff --git a/ydb/docs/ru/core/deploy/manual/connector.md b/ydb/docs/ru/core/deploy/manual/connector.md new file mode 100644 index 000000000000..21df10315284 --- /dev/null +++ b/ydb/docs/ru/core/deploy/manual/connector.md @@ -0,0 +1,185 @@ +# Развёртывание коннекторов ко внешним источникам данных + +{% note warning %} + +Данная функциональность находится в режиме "Experimental". + +{% endnote %} + +[Коннекторы](../../concepts/federated_query/architecture.md#connectors) - специальные микросервисы, предоставляющие {{ ydb-full-name }} универсальную абстракцию доступа ко внешним источникам данных. Коннекторы выступают в качестве точек расширения системы обработки [федеративных запросов](../../concepts/federated_query/index.md) {{ ydb-full-name }}. В данном руководстве мы рассмотрим особенности развёртывания коннекторов в режиме on-premise. + +## fq-connector-go {#fq-connector-go} + +Коннектор `fq-connector-go` реализован на языке Go; его исходный код размещён на [GitHub](https://github.com/ydb-platform/fq-connector-go). Он обеспечивает доступ к следующим источникам данных: + +* [ClickHouse](https://clickhouse.com/) +* [PostgreSQL](https://www.postgresql.org/) + +Коннектор может быть установлен с помощью бинарного дистрибутива или с помощью Docker-образа. + +### Запуск из бинарного дистрибутива + +Для установки коннектора на физический или виртуальный Linux-сервер без средств контейнерной виртуализации используйте бинарные дистрибутивы. + +1. На [странице с релизами](https://github.com/ydb-platform/fq-connector-go/releases) коннектора выберите последний релиз, скачайте архив для подходящей вам платформы и архитектуры. Так выглядит команда для скачивания коннектора версии `v0.2.4` под платформу Linux и архитектуру процессора `amd64`: + ```bash + mkdir /tmp/connector && cd /tmp/connector + wget https://github.com/ydb-platform/fq-connector-go/releases/download/v0.2.4/fq-connector-go-v0.2.4-linux-amd64.tar.gz + tar -xzf fq-connector-go-v0.2.4-linux-amd64.tar.gz + ``` + +1. Если на сервере ещё не были развёрнуты узлы {{ ydb-short-name }}, создайте директории для хранения исполняемых и конфигурационных файлов: + + ```bash + sudo mkdir -p /opt/ydb/bin /opt/ydb/cfg + ``` + +1. Разместите разархивированные исполняемый и конфигурационный файлы коннектора в только что созданные директории: + ```bash + sudo cp fq-connector-go /opt/ydb/bin + sudo cp fq-connector-go.yaml /opt/ydb/cfg + ``` + +1. В [рекомендуемом режиме использования](../../deploy/manual/deploy-ydb-federated-query.md#general-scheme) коннектор развёртывается на тех же серверах, что и динамические узлы {{ ydb-short-name }}, следовательно, шифрование сетевых соединений между ними *не требуется*. Однако если вам всё же необходимо включить шифрование, [подготовьте пару TLS-ключей](../manual/deploy-ydb-on-premises.md#tls-certificates) и пропишите пути до публичного и приватного ключа в поля `connector_server.tls.cert` и `connector_server.tls.key` конфигурационного файла `fq-connector-go.yaml`: + ```yaml + connector_server: + # ... + tls: + cert: "/opt/ydb/certs/fq-connector-go.crt" + key: "/opt/ydb/certs/fq-connector-go.key" + ``` +1. В случае, если внешние источники данных используют TLS, для организации шифрованных соединений с ними коннектору потребуется корневой или промежуточный сертификат удостоверяющего центра (Certificate Authority, CA), которым были подписаны сертификаты источников. На Linux-серверах обычно предустанавливается некоторое количество корневых сертификатов CA. Для ОС Ubuntu список поддерживаемых CA можно вывести следующей командой: + ```bash + awk -v cmd='openssl x509 -noout -subject' '/BEGIN/{close(cmd)};{print | cmd}' < /etc/ssl/certs/ca-certificates.crt + ``` + Если на сервере отсутствует сертификат нужного CA, скопируйте его в специальную системную директорию и обновите список сертификатов: + ```bash + sudo cp root_ca.crt /usr/local/share/ca-certificates/ + sudo update-ca-certificates + ``` + +1. Вы можете запустить сервис вручную или с помощью systemd. + + {% list tabs %} + + - Вручную + + Запустите сервис из консоли следующей командой: + ```bash + /opt/ydb/bin/fq-connector-go server -c /opt/ydb/cfg/fq-connector-go.yaml + ``` + + - С использованием systemd + + Вместе с бинарным дистрибутивом fq-connector-go распространяется [пример](https://github.com/ydb-platform/fq-connector-go/blob/main/examples/systemd/fq-connector-go.service) конфигурационного файла (юнита) для системы инициализации `systemd`. Скопируйте юнит в директорию `/etc/systemd/system`, активизируйте и запустите сервис: + + ```bash + cd /tmp/connector + sudo cp fq-connector-go.service /etc/systemd/system/ + sudo systemctl enable fq-connector-go.service + sudo systemctl start fq-connector-go.service + ``` + + В случае успеха сервис должен перейти в состояние `active (running)`. Проверьте его следующей командой: + ```bash + sudo systemctl status fq-connector-go + ● fq-connector-go.service - YDB FQ Connector Go + Loaded: loaded (/etc/systemd/system/fq-connector-go.service; enabled; vendor preset: enabled) + Active: active (running) since Thu 2024-02-29 17:51:42 MSK; 2s ago + ``` + + Логи сервиса можно прочитать с помощью команды: + ```bash + sudo journalctl -u fq-connector-go.service + ``` + {% endlist %} + +### Запуск в Docker {#fq-connector-go-docker} + +1. Для запуска коннектора используйте официальный [Docker-образ](https://github.com/ydb-platform/fq-connector-go/pkgs/container/fq-connector-go). Он уже содержит [конфигурационный файл](https://github.com/ydb-platform/fq-connector-go/blob/main/app/server/config/config.prod.yaml) сервиса. Запустить сервис с настройками по умолчанию можно следующей командой: + + ```bash + docker run -d \ + --name=fq-connector-go \ + -p 2130:2130 \ + ghcr.io/ydb-platform/fq-connector-go:latest + ``` + + На порту 2130 публичного сетевого интерфейса вашего хоста запустится слушающий сокет GRPC-сервиса коннектора. В дальнейшем сервер {{ ydb-short-name }} должен будет установить соединение именно с этим сетевым адресом. + +1. При необходимости изменения конфигурации подготовьте конфигурационный файл [по образцу](#fq-connector-go-config) и примонтируйте его к контейнеру: + + ```bash + docker run -d \ + --name=fq-connector-go \ + -p 2130:2130 \ + -v /path/to/config.yaml:/opt/ydb/cfg/fq-connector-go.yaml + ghcr.io/ydb-platform/fq-connector-go:latest + ``` + +1. В [рекомендуемом режиме использования](../../deploy/manual/deploy-ydb-federated-query.md#general-scheme) коннектор развёртывается на тех же серверах, что и динамические узлы {{ ydb-short-name }}, следовательно, шифрование сетевых соединений между ними *не требуется*. Но если вам всё же необходимо включить шифрование между {{ ydb-short-name }} и коннектором, [подготовьте пару TLS-ключей](../manual/deploy-ydb-on-premises.md#tls-certificates) и пропишите пути до публичного и приватного ключа в секции конфигурационного файла `connector_server.tls.cert` и `connector_server.tls.key` соответственно: + + ```yaml + connector_server: + # ... + tls: + cert: "/opt/ydb/certs/fq-connector-go.crt" + key: "/opt/ydb/certs/fq-connector-go.key" + ``` + При запуске контейнера примонтируйте внутрь него директорию с парой TLS-ключей так, чтобы они оказались доступны для процесса `fq-connector-go` по путям, указанным в конфигурационном файле: + + ```bash + docker run -d \ + --name=fq-connector-go \ + -p 2130:2130 \ + -v /path/to/config.yaml:/opt/ydb/cfg/fq-connector-go.yaml + -v /path/to/keys/:/opt/ydb/certs/ + ghcr.io/ydb-platform/fq-connector-go:latest + ``` + +1. В случае, если внешние источники данных используют TLS, для организации шифрованных соединений с ними коннектору потребуется корневой или промежуточный сертификат удостоверяющего центра (Certificate Authority, CA), которым были подписаны сертификаты источников. Docker-образ для коннектора базируется на образе дистрибутива Alpine Linux, который уже содержит некоторое количество сертификатов от доверенных CA. Проверить наличие нужного CA в списке предустановленных можно следующей командой: + + ```bash + docker run -it --rm ghcr.io/ydb-platform/fq-connector-go sh + # далее в консоли внутри контейнера: + apk add openssl + awk -v cmd='openssl x509 -noout -subject' ' /BEGIN/{close(cmd)};{print | cmd}' < /etc/ssl/certs/ca-certificates.crt + ``` + + Если TLS-ключи для источников выпущены CA, не входящим в перечень доверенных, необходимо добавить сертификат этого CA в системные пути контейнера с коннектором. Сделать это можно, например, собрав собственный Docker-образ на основе имеющегося. Для этого подготовьте следующий `Dockerfile`: + + ```Dockerfile + FROM ghcr.io/ydb-platform/fq-connector-go:latest + + USER root + + RUN apk --no-cache add ca-certificates openssl + COPY root_ca.crt /usr/local/share/ca-certificates + RUN update-ca-certificates + ``` + + Поместите `Dockerfile` и корневой сертификат CA в одной папке, зайдите в неё и соберите образ следующей командой: + ```bash + docker build -t fq-connector-go_custom_ca . + ``` + + Новый образ `fq-connector-go_custom_ca` можно использовать для развёртывания сервиса с помощью команд, приведённых выше. + +### Конфигурация {#fq-connector-go-config} + +Актуальный пример конфигурационного файла сервиса `fq-connector-go` можно найти в [репозитории](https://github.com/ydb-platform/fq-connector-go/blob/main/app/server/config/config.prod.yaml). + +| Параметр | Назначение | +|----------|------------| +| `connector_server` | Обязательная секция. Содержит настройки основного GPRC-сервера, выполняющего доступ к данным. | +| `connector_server.endpoint.host` | Хостнейм или IP-адрес, на котором запускается слушающий сокет сервиса. | +| `connector_server.endpoint.port` | Номер порта, на котором запускается слушающий сокет сервиса. | +| `connector_server.tls` | Опциональная секция. Заполняется, если требуется включение TLS-соединений для основного GRPC-сервиса `fq-connector-go`. По умолчанию сервис запускается без TLS. | +| `connector_server.tls.key` | Полный путь до закрытого ключа шифрования. | +| `connector_server.tls.cert` | Полный путь до открытого ключа шифрования. | +| `logger` | Опциональная секция. Содержит настройки логирования. | +| `logger.log_level` | Уровень логгирования. Допустимые значения: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, `FATAL`. Значение по умолчанию: `INFO`. | +| `logger.enable_sql_query_logging` | Для источников данных, поддерживающих SQL, включает логирование транслированных запросов. Допустимые значения: `true`, `false`. **ВАЖНО**: включение этой опции может привести к печати конфиденциальных пользовательских данных в логи. Значение по умолчанию: `false`. | +| `paging` | Опциональная секция. Содержит настройки алгоритма разбиения извлекаемого из источника потока данных на Arrow-блоки. На каждый запрос в коннекторе создаётся очередь из заранее подготовленных к отправке на сторону {{ ydb-short-name }} блоков данных. Аллокации Arrow-блоков формируют наиболее существенный вклад в потребление оперативной памяти процессом `fq-connector-go`. Минимальный объём памяти, необходимый коннектору для работы, можно приблизительно оценить по формуле $Mem = 2 \cdot Requests \cdot BPP \cdot PQC$, где $Requests$ — количество одновременно выполняемых запросов, $BPP$ — параметр `paging.bytes_per_page`, а $PQC$ — параметр `paging.prefetch_queue_capacity`. | +| `paging.bytes_per_page` | Максимальное количество байт в одном блоке. Рекомендуемые значения - от 4 до 8 МиБ, максимальное значение - 48 МиБ. Значение по умолчанию: 4 МиБ. | +| `paging.prefetch_queue_capacity` | Количество заранее вычитываемых блоков данных, которые хранятся в адресном пространстве коннектора до обращения YDB за очередным блоком данных. В некоторых сценариях бóльшие значения данной настройки могут увеличить пропускную способность, но одновременно приведут и к большему потреблению оперативной памяти процессом. Рекомендуемые значения - не менее 2. Значение по умолчанию: 2. | diff --git a/ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md b/ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md new file mode 100644 index 000000000000..0ff89e632935 --- /dev/null +++ b/ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md @@ -0,0 +1,59 @@ +# Развёртывание YDB с функцией Federated Query + +{% note warning %} + +Данная функциональность находится в режиме "Experimental". + +{% endnote %} + +## Общая схема инсталляции{#general-scheme} + +{{ ydb-full-name }} может выполнять [федеративные запросы](../../concepts/federated_query/index.md) ко внешним источникам (например, объектным хранилищам или реляционным СУБД) без необходимости перемещения их данных непосредственно в {{ ydb-short-name }}. В данном разделе мы рассмотрим изменения, которые необходимо внести в конфигурацию {{ ydb-short-name }} и окружающую инфраструктуру для включения функциональности федеративных запросов. + +{% note info %} + +Для организации доступа к некоторым из источников данных требуется развёртывание специального микросервиса - [коннектора](../../concepts/federated_query/architecture.md#connectors). Ознакомьтесь c [перечнем поддерживаемых источников](../../concepts/federated_query/architecture.md#supported-datasources), чтобы понять, требуется ли вам установка коннектора. + +{% endnote %} + +Кластер {{ ydb-short-name }} и внешние источники данных в варианте production-инсталляции должны развёртываться на разных физических или виртуальных серверах, в том числе в облаках. Если для доступа к определённому источнику требуется развёртывание коннектора, это необходимо сделать на тех же серверах, на которых развёрнуты динамические узлы {{ ydb-short-name }}. Иными словами, на каждый процесс `ydbd`, работающий в режиме динамического узла, должен приходиться один локальный процесс коннектора. + +При этом должны выполняться следующие требования: +* внешний источник данных должен быть доступен по сети для запросов со стороны {{ ydb-short-name }} или со стороны коннектора (при его наличии); +* коннектор должен быть доступен по сети для запросов со стороны {{ ydb-short-name }} (что достигается тривиальным образом благодаря работе этих процессов на одном и том же хосте). + +![Инсталляция {{ ydb-short-name }} FQ](_images/ydb_fq_onprem.png "Инсталляция {{ ydb-short-name }} FQ" =1024x) + +{% note info %} + +В настоящее время мы не поддерживаем развёртывание коннектора в {{k8s}}, но планируем добавить её в ближайшем будущем. + +{% endnote %} + +## Пошаговое руководство + +1. Выполните шаги инструкции по развёртыванию динамического узла {{ ydb-short-name }} до [подготовки конфигурационных файлов](./deploy-ydb-on-premises.md#config) включительно. +1. Если для доступа к нужному вам источнику требуется развернуть коннектор, сделайте это [согласно инструкции](./connector.md). +1. Если для доступа к нужному вам источнику трубуется развернуть коннектор, в конфигурационном файле {{ ydb-short-name }} в секции `query_service_config` добавьте подсекцию `generic` по приведённому ниже образцу. В полях `connector.endpoint.host` и `connector.endpoint.port` укажите сетевой адрес коннектора (по умолчанию `localhost` и `2130`). При совместном размещении коннектора и динамического узла {{ ydb-short-name }} на одном сервере установка шифрованных соединений между ними *не требуется*, но в случае необходимости вы можете включить шифрование, передав значение `true` в поле `connector.use_ssl` и указав путь до сертификата CA, использованного для подписи TLS-ключей коннектора, в `connector.ssl_ca_crt`: + ```yaml + query_service_config: + generic: + connector: + endpoint: + host: localhost # имя хоста, где развернут коннектор + port: 2130 # номер порта для слушающего сокета коннектора + use_ssl: false # флаг, включающий шифрование соединений + ssl_ca_crt: "/opt/ydb/certs/ca.crt" # (опционально) путь к сертификату CA + default_settings: + - name: DateTimeFormat + value: string + - name: UsePredicatePushdown + value: "true" + ``` +1. В конфигурационном файле {{ ydb-short-name }} добавьте секцию `feature_flags` следующего содержания: + ```yaml + feature_flags: + enable_external_data_sources: true + enable_script_execution_operations: true + ``` +1. Продолжайте развёртывание динамического узла {{ ydb-short-name }} по [инструкции](./deploy-ydb-on-premises.md). diff --git a/ydb/docs/ru/core/deploy/manual/toc_i.yaml b/ydb/docs/ru/core/deploy/manual/toc_i.yaml index e4c6085fb110..e8bd5584b771 100644 --- a/ydb/docs/ru/core/deploy/manual/toc_i.yaml +++ b/ydb/docs/ru/core/deploy/manual/toc_i.yaml @@ -1,5 +1,9 @@ items: #- name: Обзор # href: concepts.md -- name: Развертывание +- name: Развертывание YDB href: deploy-ydb-on-premises.md +- name: Развертывание YDB с функцией Federated Query + href: deploy-ydb-federated-query.md +- name: Развертывание коннектора + href: connector.md diff --git a/ydb/docs/ru/core/deploy/toc_i.yaml b/ydb/docs/ru/core/deploy/toc_i.yaml index 9c6d94948add..e907ba76574c 100644 --- a/ydb/docs/ru/core/deploy/toc_i.yaml +++ b/ydb/docs/ru/core/deploy/toc_i.yaml @@ -1,6 +1,6 @@ items: - name: VM / Baremetal - href: manual/deploy-ydb-on-premises.md + include: { mode: link, path: manual/toc_p.yaml } - name: Развертывание одноузлового кластера include: { mode: link, path: ../getting_started/self_hosted/toc_p.yaml } - name: Конфигурация diff --git a/ydb/docs/ru/core/getting_started/self_hosted/_images/ydb_fq_docker.png b/ydb/docs/ru/core/getting_started/self_hosted/_images/ydb_fq_docker.png new file mode 100644 index 0000000000000000000000000000000000000000..be30ef3370975fe12f8cfb0b4701f0b69aa66c81 GIT binary patch literal 13418 zcmdUW1z42dx-TUy-5}-AIYS6Ycee;g4b0Hp9n#$;2nZ^oq)3Y(J<>`_BPAiy4fjKF z@BcpM-1D4!_jAv=d+=dC-nG`d-u2e{t#7?XYpN^U#-_qXLPEN&q$sP6goI29Jnvwl z1HZ4F+f{)tWH)UEX{7Q2n(s(RxE=0t`tA+=g8Xd2 zla#ZwJ^UfuT*b!yx4fm%XcZT`agdTpL?S2r6c$D5^rxdpia>U9I$ z9cuNPs_Y{uMX%L}oD*;sp-{_f-M;|#yi9^BK$0uThk#3#qZD+P?d{>LW+ zvw>QsB2Db+o!@Pg91@j33>be#k=JT5(#Cz{I!OHc|JU7!`6Lf5RZv+AZ!d{QF z^00xy-EO%3Cb>H~*}K~~|0B`D$|NG*S!_tHdfYuW(Eri|CZ7L`ls{FO5CiWFek6yIt>?xY47k z6QI*yk^Rf8ZtgyRH0`q|*vmNCJGlZBCJ3LU!1d2H9T@r- zI^VAkKzDO@b+Uv1rA6@92Wa_i+Ur^1e?0s*$r<{0#r@Lde{Q|M+fV@xBnx*}AHa?` z9EENae-ki}KYx4u-TnSXv;K?gLHKV5LT_B&>Tj&C1L?ow7C@gHulmo$zfRWw#`*Ye zRP%R>%Q{)uUFW;&$^UtC16f`Azhqq)6fR_G@gG{zLI`ed`QKzkFz-K_k?&7J`>PrM zSnRK61PlL9nvwtCnbAP@#_RuYZTN4Z4<-N?g7N=H_7Q@Z^9f$t`9_BSr%?vx`#1LC z7x+ExU+e?^6MBEO&!2JsBT;rE<6rWsr3=^1*5~BPW##nGh1eM_wvR4xones$+Rg!9l2XS2QXvID%g z`)2N>8zScSAJHFk~LPSmFtK|qjvmJeHJzjo? zlZ#7@f;vYY1(c+P5*-~qQEi3KZTcbZ$4ryVw8_QPPDP4v;JMKG`T0=UqkC+Wv>-_o z&`&UmG~~mF56+&R@~O;f;zC=f2+WV0Q+4=`j*hH)r4)!1B&?)X7A#T!V*(DnQpp@a z2XO*5Dn!(y=jj{|&412+B7Wybo4Dz;?r^m8JFQ2{}a73L+HinK~!{fNMvNB5ooB(e)3@@m+akZMaCk$&;(YH2d)_oKLv2=jnv`>;}!g z0(f^sK&Bhh4SbEBTkJtC-aF>GLN3ymLGe@qKlcVDdR6avFz@y9wDgePs$$GeN}?Yu zd00W4l9B>zSPbEE(S5Cpbyq$v$-=%jg9{TAGijBCRa0`dC3bJD)F4zk9Q)(UjIp@d zr*?0wD2&4}ot3_9)CY=+QKw6YR9E?|NEhN`edfL69gF?%Ulh2(F@yP0_v(|{c9(mX zyX6F(X5((*kkNB<<2Cyobpb<`O&NAhUq5&QO1r!`>*XvmsJ65L-RB#?RLy2|3hfAJJg&3o#ydI5M8&|H-}~?SWLc! z55G_$KY4PD1A1PdT|zHTkaG?u;^E;TVbewv^E*_%n~jW$K0p64boXn^<^8$KJT}H`qM}vhVW343 z2AUn*Fz-U0sCf(GmO7_7^=wSCI(fzoCGMlsgN?U=U?om*GZu4fEG((c7*aShi1z$B zOx12EW{3W=wM0}@bfhr##K!j9_WVACa_6DkG+nIBbRH%m3Rkw2J%ihrs=T~>S77Jh z4r3}|oigJ$!tChq@bJFJDRKqTFj?likmpOToh-+4r@8)TOzT8$LgsmEV`Y!Bz!vCC z7Uwd&YB(L$Ue#8E5uX6x;L?WDc4^F8q!2C=7(Dh)jTNR$R>SOF0Ie05UXSY z-r-o$?~vUDcr5GWm=g(!iW*rmEFQ{F(*o(LpnyKREXtnsQLViz(a_i!*=;KRBsa-# z-HDkkC~oM{8A-N zgLrdK8noya+!15Gv)H+eo!G;jvid;8V;xkDzq7RR&gM%JlM30%3s^k$y^P6-r1&1~ z%PryTkvf&_3hoDQ^wa{4nEVbmwMEU_3}A}pFH~Tk8FO}bc9JQgXW#pD%~90`16g?<6PH>rE(Kg~0+j@rNu#k+60fTYhNIyoBQaS-HMeCcwuWJ9 zLYPOg?Xj^a7?G8`2>imq$-!8J1u)fDe3K+Zys{{1aSVsp7L3TmjFN~fO{=$TAhRfd zlguV*mn;giY;X$r*#JyOv6TRel{@=53phep{`5MyQGA~Vfd`DXStzoIaT_fD9? zw;;GUzimFp+*9GXB!vPhxR0`U7VK&W5Apaw!>YrN&z?CDn-cD}QQoG@w4ny^Dl#W< zUtnM?GHowcg-(!g1-IRDWq35k>1)Hv!GP?>LO`A${HnOZWqVl1sO3&L(OAP!sUA4( zbCvx*WA?mCqBAdu$aIu5ew|b~%g?m>0%S&jc3JO2$0EHwKP4hqS)rwC1^#XXIa60y zT)D`!p6tQ%aW(i}%D|b0rbzz{&7?Unwl#HJ>w_%qI~%jq9Tfh0>EaYZN>%G)9ibo{ zR5JE-^o5@!z%CR(tf>8F!E24tHiHA{Z2jw?wL3E<7^9kVUc|SitRGi?t|;OlSrQ#9 zD$ep^c8_9SUunaQUnDh7V_uDZ{gXD1SiM0xQ&`ny}FgN8!zX$KS#ruEAZt8 zYdww&o{lwsZy`_P%>MSw>p4LookUp695ZL${o8K>j-Kf?M`XWVYEhK8?>D!x$u9Br zNp{2pGPdBRSmaK#t!)`fnTou>y!rH#J&L8%^s&bPr-OW zCTFNB<6CVrQwtOuV5DKK#N~nK7R6kWWsRnb>&u#PRA0$S*&lrT7I{1afC<&`{;O-?o}DZYw~w9HBQNE?b-7;#tDO#MEB2Qpx5_Kj$xv-`N_GK3f=E!b@IUJ9Y>-hq&PF zF7rgGb5YMPmXWhnPznm~#U`YHTg1neV==8=?APFxjzG4li0X8ex2nH#GIG-$h009J4GTLV||^%>5>D zgS`*hj=0NyPIWdJ+K}T6iz6$`pQ-j2b|fk>`_RHq(1W}$_wTSzJdkRk6JtvKCKZx( z&w_@mY$inTaMWdCMJGF;qq}>-q=SnMmw__*yZzidLrXS`;(_*}$mHGgr9KvcdFt^B zwc}u#8*!qm`YN`##uvwXyy=In?jD>LKmzbd{xVqBG03X+43b z`*hgj+oz$~anBNxt`cKYz~|!TVIZ>+ARia~5J%TQiO>M@@&DJv+l@SnN|_p3-;>2GaotwI z`JtILEY-D_&XAOq7S+x3WBxQ0hjn+7{FT;cgJen8Z1>7o+?CTJ$%!eC=SC!yd8NA4 zXkqq`+dT`lhcjYe{AsEp&E89m(q)utPNB&P5<5dF8%GRYZ52uw&;nfwxfoCvjJ$rbFaQ9b7ab*# zVb-~%Np}00HY6=>7UK?wZUhq0?I=GpzjNL({$Q4`54N9>OS+zf#3X}QbM~&XCPHGR zRF2q2B^qKwR*py+{*Ls;Y9aItH^d=Zn*tmtt6ZaT6&O=%TGPJ1_2%dKq}%?+dPJ!~ z^{4SY(jMSg)zVgwkMXfFs^;7nzdNs<#+QGtw1Ud`Xz_H^cdq#>KTkE8#Et_K^X!Z-4ms;&?lY-xY$QK_{uLO~iPY zvC97vJNIM@*OO*8!T)4W3E{pqO1dRdBWG?-bhwGx`fm8k&e?%ZPl`aS$&>|qx)!kX zk~sJz9z#5i6^~iNHpHmeomArR6w6P2Qz~!JwK!RIUH<6D1V+uqcnYPOEh5ToadA=diCFkSD0><$~gQtDXR=?dpi&cV3@h!~K4r#8meJWyACA z`MYh46fA7qwzkRGN)GGdG}Vb^UReK=Tidf;41NilSq;wUG=@T1G%NuEQ7O-0`vPP8 zhKUxbhZN_%X8cYwo2e<2o7K0ZFcQp`$I@*elr@I&_v;vc>Qv;<3x5nPM8NO?`>QfX zCO}QcsheaSZquqD5XgKp4jeIKjT)Z-%!TW2 zeOXt@pR77k#L9VS__&&7(0r9pVyP$l8!BSDIe`3!7<%DgtN7Z8arU;{i4!QT zjV@dSz~m`Pf$H+p*vkW{)M`)ebw#<{)f{Rph+{l(gDhI@__2_)oUefi*wd9UlAi=7 zr{OYLoEGJB>84$KBdZ9KLBn_A0vIdh7rJHpXN5pYQI;P@?)jvaf8CyS$!di|$|Q-< zgv8)U>N%cXw|DI;2tRto3gAX0ruG?Zk^qfLS1zJ?$6NWqB_m$vu5(OW39e)>Sdo7; z!3t6q@KRzv@aQ*WU^-(c-IioeM??+Plh>l(>CnMVK{{$1kgQ{6qa=LUWBs@6(19yE zpra%yj6%k)Lel3D>pFHAvl;Sq+ST#L%V`R;evla%AV>9@rCVNYOn!8 z0j#euXHtUgwqtDrx78E6PX0bGHvKBuN%C&Ep z+|4T+z5gpKAqsMKdr3AArXneEii>P|Uy78-)2JT97dvl6G=NkA04 zLsqDCt&y(Gf=Jly<5y~OEWVzXSg#EcQM*IkDN8dyX-kB2X>EtMrxsS5h#%qEM{HA>a^_W|U4CrRdgaWZ zN+nErvb&n@G$ik7`b?!_VE2knBCf6O;6vwk>Xy4J4{*N*xs#Lyt1GG@LtP~&}c$R*-GgS@BL-~KH!vC1MfzYj(`8+1JEEm$@i26xMZ?p^j* zR@IyYZ;g3ORXVMTmmWL{WeJjysuP?U9=cRf-G35wc&P47CyrEh|DF7C{=(X9Ee6UU zjpQZDiN|M!Q}(o93K>gbB0du#D=#y4IqkABNIX_C>8%xfES>02PkH5VDfDiem#pj& z>tMjy3Ku6Q7fjCub_L~}JRU?fM!=&WN5BSEGC!wNK3Q)|Tz=s~`lY#81yTC7mdNwL z){=**o_*9-WbL=}?|Tz1ei9jH+^H?(;FwYtent{tT?(B%>;(_6Om+nrp3?}sE%t=8 z18?mftMKGmL_U`AeY7mxN<6wTBAH0n*t;7}B6m3I)2?s8afyH9~3; zKi5Y&HpQwGWzx$dV_Use*1n5T#fnH~Ff?d;{Aj zc4eYL?5oGA6^c*i46rwe zq^o|EzO~~KUjg>`$S+LT=(|J3nI%paIS?Ska(6}T>(qT9rKqtQ*T$Xqnlzk}TaTCA z`8rkCT^l+D(_>r_!$UuhIk?>T4C8)c36Zu|$5WrEx4LUDQP-Is`;eeA0AF5%nu*0d zv;PtIsvYpMa;QO$yw6Y+2Y_jhAK*Of#j*o?J+~3vZYSLnD^7UY7Q@v`cya7h|IStF z+e=gE=PPD}aQ3Cl%o5GW0mt}GK=B7xb@`r&Ec{>UJysilbu2k*nh{vWJ!O07*r6Ps zWjzyMKaSxBVN7Zb4*i+;or^D*Cx>6rz?djIGHX6QF;St4sFN3ASD34>7U1{11Qzxx zrKIir~$5|V)L{zmjR#^J{~EosbhMP+4KP3xq-F#hP( zfcgrPF0 zI<;iDGz?KE8{3>Q@32W7MdD6D1bUc%e9mtfc8QM7TDyag6}H^7UoFYk5u z4Cad~?4)!K57(dq$UNl5i(3F((CU9F0_-vcidBq;@%G^4l!oMyr&v;pWFRixJ)@Y_)AWt&qu1;4@(2Lj=s=8&PmjHq zIx4!hM^&F5e}Ks{@OAfxeQHCI(4dN@hTa3)EP+J*c5x@Gt=f~cWafw3kau==DDK(Q zNXu|DvVpM#Hb&E&R+4|*B_yE;drSJs{A2}Q@9~Z7WkMdhii(Q+_t9H+WECH~#HfER zp5VmY5y4yLNbd$VzJ+QxuE+Rv-RCxY1a##1)Ke*05GP1!tp2zC=r38jy}FzC8}7Y` z-=1x5pVJB|l0^_rwAe{p-h*@F<<2JBPriMTOYCzb{#DR6*0|gAS(<~ACB&%h^5_VY zDV5F3+JCr3$@1&k_nzc-Y?*FcIX^#^dVIRzu{(x$&j+2|ou*JjtnlcUbTMno=D4eI zlAELM`2bKuv(d7#wB}LHk7KB&=~8b#J}s&!FLXeb-`a={Fi&QMgx@Kr9Hj25G1RfFY2iph12G<^ z*in|2MecL5$Yf;LNWY|q$;rt%-@=7-=C$4JItSmKo*1vP*4$AElMnm!sH9jU4|=hC zfGO^dTi51PbouZA?BB<$`&t{TJ&+3)%Lmj|omLzPMnZ80#Fp)!+fXd55-tj6=7bM2CN<6m5&*TzAgt}t zx$=&k>QoS&b4_jK*N-6X)R?wi>= zr}K4oEK4OCh4}?a56;q5U$)ouDvScNET@4;D%Las;nJf!iSAn$x!eo-|+aa`Bq<=#S_M0IP}H~}DY2%Jo1;-N=to5$>{F9mpH zefq(pvEx0!B#7r3aRf=Jp~QkeCNZZ59bA?dNe#N!Y>Fz~_P9&+D^YOt1~cpA%mr-D zy0-MigKpvFpRx3=PZ#n57oS~gB}JO`*3jvb4ZJS={NT5wz2z}*lh;xst zzQR0_kT4XlYXL?l$g@SjVu&b|)Q6cJ)hxfaYz(TeL2RogUAUC7)Z>mloC=&=vGDEO z2G>H<@xb~+1&WLl?#|{Y#lEKya-ZFm5IZOGzNaGOmmiV6PZs5Xo9xCLG{NH8tx6ql zcwmR{bTF#HdNl4J=dR2D*0Mk5wZLn$wJhi>vxg=hF88;KcWb{NFMI<|33p0k4U3BG z6!xjlL*sZ(gtLz_KV>=?MnvG<=Ib2D;$C!veBUQyvQU*dY1fw%1mS^QIQ4RDGhT-P z5E`1^u55MN&2bjXQRZ}E%5!eL6wiVq3jiN+4t=&XPMKuJLk(6|Cxbyb68;%w?d1he zsZ8^;i95Nnz4fdP4o55Bo?4CePG2J{@Z?djK9mT5ui2L}6eS$nEM;7F;z7+98^5^^ z0T_eGdG*dKD1lTbca6^K*WZa-u;yM*1I>lV-(|kuedI;`DI3AS{>(~gK~Wqx9Q6x? z5aOFgIN}03IcgfN7YA=d9iGKV7?KyqF&fj0ywrscDo=1t^DJ=D>bFxulq2pdQngP- zf&*iaypqmI7?A$u0SB%LWCu%$qHaYLy&(`zE7qo$5*$saUw?A1&;846ackslYV zKX_C+=SPc5!|_aaVcL+($SQ?U3*FQ=Z0KjP^W{r)PF?mg^no^@c;lT6y;51k+_TtM zTD3?twj`tK&q|bsOJK5Mf4_N5po|}DO1zcQ8g!Q)lGU0DWi?&o18yDn=CIFtMny8? z`mDLd+djG_TN_pLuUfl*j^|86gQ(B%#$yQ4`ciYPPppALl@b(rQV7$dg`X7Q;1g0_ zP^Et&kwO%nXw^${i&tjuRDI-7q}M@PTqGtq*cV`=6O)_RVr_Ul@=gz2Hu&*9UG6L> z)u>8;_Ji;0Ek_C8@U>MZf-^y6+Zqv_A#Xy!9F}Oh?4*BgY>oNxUuPhSN8#)Hg+om+ zewnsvV6m#<003tfLwF_yhY%&D>>M8I9c@jGOUl6X0ljspnjNfx)L^bau8a)w`5CQ$Wc=FNXE>^fW6~>K#0L3M}i)V1m9F zQVx3BO^46Vm7cUoB&oi&zOH=&%3=8BKRfDkURQOIU`9Oxfr~dbH_t=NKKCKs&*{5t z0JRFt!V+J~jqjI>WVczHQ7cNuq7e}j6A}>67aM$5QM?pLRZ5UiML#<``!?O+wrhd0 z_+3oAvbfX4Tnl6thSm}4*>ZJxKY>;(1}K1N9UdlZa0~M1NOxySoetcgQFC_Y9mo|9 z1uAuVgUm0QoEO^8_iG99h$2)`C>iKe;$OXDm6esH7Zs(s=Qu6d?0W!P)OxL3qCxf| z8J3~~)H2;JH))Q^x#zSv+w2#WkPv~{IxwLAQuSArS!A$212zs$YIImljev3nCmV6x lr*w|H3JEk+_x7)l`~+O>h{9^G|ML)%lAOA1xwP5i{{={WjoAPI literal 0 HcmV?d00001 diff --git a/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md b/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md index 35040bf807a6..b9cf4076ba93 100644 --- a/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md +++ b/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md @@ -97,6 +97,8 @@ docker run -d --rm --name ydb-local -h localhost \ `-v`: Монтировать директории хост-системы в контейнер в виде `<директория хост-системы>:<директория монтирования в контейнере>`. Контейнер YDB использует следующие директории монтирования: - `/ydb_data`: Размещение данных. Если данная директория не смонтирована, то контейнер будет запущен без сохранения данных на диск хост-системы. - `/ydb_certs`: Размещение сертификатов для TLS соединения. Запущенный контейнер запишет туда сертификаты, которые вам нужно использовать для клиентского подключения с использованием TLS. Если данная директория не смонтирована, то вы не сможете подключиться по TLS, так как не будете обладать информацией о сертификате. + +`-p`: Опубликовать порты контейнера на хост-системе. Все применяемые порты должны быть явно перечислены, даже если используются значения по умолчанию. `-e`: Задать переменные окружения в виде `<имя>=<значение>`. Контейнер YDB использует следующие переменные окружения: - `YDB_DEFAULT_LOG_LEVEL`: Уровень логирования. Допустимые значения: `CRIT`, `ERROR`, `WARN`, `NOTICE`, `INFO`. По умолчанию `NOTICE`. - `GRPC_PORT`: Порт для нешифрованных соединений. По умолчанию 2136. @@ -108,7 +110,7 @@ docker run -d --rm --name ydb-local -h localhost \ - `POSTGRES_USER` - создать пользователя с указанным логином, используется для подключения через postgres-протокол. - `POSTGRES_PASSWORD` - задать пароль пользователя для подключения через postgres-протокол. - `YDB_TABLE_ENABLE_PREPARED_DDL` - временная опция, нужна для запуска Postgres-слоя совместимости, в будущем будет удалена. -`-p`: Опубликовать порты контейнера на хост-системе. Все применяемые порты должны быть явно перечислены, даже если используются значения по умолчанию. +- `FQ_CONNECTOR_ENDPOINT` - задать сетевой адрес коннектора ко внешним источникам данных для обработки [федеративных запросов](../../../concepts/federated_query/index.md). Формат строки `scheme://host:port`, где допустимыми значениями `scheme` могут быть `grpcs` (указывает на подключение к коннектору по протоколу TLS) или `grpc` (подключение без шифрования). {% include [_includes/storage-device-requirements.md](../../../_includes/storage-device-requirements.md) %} @@ -166,3 +168,82 @@ docker run --rm -it --entrypoint cat {{ ydb_local_docker_image }} LICENSE ```bash docker run --rm -it --entrypoint cat {{ ydb_local_docker_image }} THIRD_PARTY_LICENSES ``` + +## Запуск {{ ydb-short-name }} Federated Query в Docker + +{% note warning %} + +Данная функциональность находится в режиме "Experimental". + +{% endnote %} + +В данном разделе рассматривается пример тестовой инсталляции {{ ydb-full-name }}, сконфигурированной для выполнения [федеративных запросов](../../../concepts/federated_query/index.md) к внешним источникам данных. Подключение {{ ydb-full-name }} к некоторым из источников требует развертывания специального микросервиса - [коннектора](../../../concepts/federated_query/architecture.md#connectors). Ниже мы воспользуемся инструментом оркестрации `docker-compose` для локального запуска Docker-контейнеров с тремя сервисами: + +* {{ ydb-short-name }} в одноузловой конфигурации; +* PostgreSQL (в качестве примера источника данных); +* Коннектор [fq-connector-go](../../../deploy/manual/connector.md#fq-connector-go). + +![YDB FQ in Docker](../_images/ydb_fq_docker.png "YDB FQ in Docker" =320x) + +{% note info %} + +В данном руководстве запросы к {{ ydb-short-name }} выполняются через [Embedded UI](../../../maintenance/embedded_monitoring/index.md). Возможность выполнения запросов через [{{ ydb-short-name }} CLI](../../../reference/ydb-cli/index.md) появится в ближайшем будущем. + +{% endnote %} + +1. Установите `docker-compose` подходящим вам [способом](https://github.com/docker/compose?tab=readme-ov-file#where-to-get-docker-compose). + +1. Скачайте [пример](https://github.com/ydb-platform/fq-connector-go/blob/main/examples/docker-compose/docker-compose.yaml) файла `docker-compose.yaml` и запустите контейнеры: + + ```bash + mkdir /tmp/fq && cd /tmp/fq + wget https://raw.githubusercontent.com/ydb-platform/fq-connector-go/main/examples/docker-compose.yaml + docker-compose pull + docker-compose up -d + ``` + +1. Инициализируйте любым удобным вам способом данные внутри развернутого в контейнере источника, например, подключившись к нему через CLI: + ```bash + docker exec -it fq-example-postgresql psql -d fq --user admin -c " + DROP TABLE IF EXISTS example; + CREATE TABLE example (id integer, col1 text, col2 integer); + INSERT INTO example VALUES (1, 'a', 10), (2, 'b', 20), (3, 'c', 30), + (4, 'd', 40), (5, 'e', 50), (6, NULL, 1);" + ``` + +1. Откройте в браузере страницу `http://hostname:8765/monitoring/tenant?schema=%2Flocal&name=%2Flocal`, где `hostname` - сетевое имя хоста, на котором развёрнуты контейнеры ([ссылка для localhost](http://localhost:8765/monitoring/tenant?schema=%2Flocal&name=%2Flocal)). Вы попадёте в Embedded UI базы данных `/local` локально развернутого инстанса {{ ydb-short-name }}. В панели для запросов введите код, регистрирующий базу данных `fq` из локального инстанса PostgreSQL в качестве внешнего источника данных для {{ ydb-short-name }}: + + ```sql + -- Создаётся секрет, содержащий пароль "password" пользователя admin базы данных PostgreSQL + CREATE OBJECT pg_local_password (TYPE SECRET) WITH (value = password); + + CREATE EXTERNAL DATA SOURCE pg_local WITH ( + SOURCE_TYPE="PostgreSQL", -- тип источника данных + DATABASE_NAME="fq", -- имя базы данных + LOCATION="postgresql:5432", -- сетевой адрес источника (в данном случае соответствует + -- имени сервиса в файле docker-compose.yaml) + AUTH_METHOD="BASIC", -- режим аутентификации по логину и паролю + LOGIN="admin", -- логин для доступа к источнику + PASSWORD_SECRET_NAME="pg_local_password", -- имя секрета, содержащего пароль пользователя + USE_TLS="FALSE", -- признак применения источником TLS-шифрования + PROTOCOL="NATIVE" -- протокол доступа к источнику данных + ); + ``` + +1. В селекторе типов запросов внизу страницы выберите `Query type: YQL Script` и нажмите кнопку `Run`. Запрос должен завершиться успешно. + +1. Затем введите запрос, непосредственно извлекающий данные из таблицы `example` базы данных `fq` локального инстанса PostgreSQL: + + ```sql + SELECT * FROM pg_local.example; + ``` + +1. В селекторе типов запросов внизу страницы выберите `Query type: YQL - QueryService` и нажмите кнопку `Run`. На экране появятся данные таблицы, созданной во внешнем источнике несколькими шагами ранее. + +Успешное выполнение последнего запроса демонстрирует работоспособность всей цепочки преобразований данных: пользователь {{ ydb-short-name }} формулирует YQL-запрос к внешней базе данных PostgreSQL, {{ ydb-short-name }} обращается к коннектору по внутреннему API, коннектор генерирует запрос на диалекте PostgreSQL, извлекает данные из внешнего источника, и передаёт их в {{ ydb-short-name }} для отображения. Точно таким же образом в одном YQL-запросе можно обратиться сразу к нескольким источникам разных типов одновременно, извлечь и объдинить данные и совместно их проанализировать. + +{% note info %} + +О дополнительных опциях запуска коннектора можно узнать в [руководстве по развертыванию](../../../deploy/manual/connector.md#fq-connector-go-docker). В качестве внешних источников данных можно использовать любое хранилище или базу данных из перечня [поддерживаемых](../../../concepts/federated_query/architecture.md#supported-datasources). + +{% endnote %} diff --git a/ydb/library/query_actor/query_actor.h b/ydb/library/query_actor/query_actor.h index ef47d2300a0a..b5bf939b8a8f 100644 --- a/ydb/library/query_actor/query_actor.h +++ b/ydb/library/query_actor/query_actor.h @@ -12,11 +12,12 @@ #include #include #include +#include +#include #include namespace NKikimr { -// TODO: add retry logic class TQueryBase : public NActors::TActorBootstrapped { protected: struct TTxControl { @@ -168,4 +169,102 @@ class TQueryBase : public NActors::TActorBootstrapped { std::vector ResultSets; }; +template +class TQueryRetryActor : public NActors::TActorBootstrapped> { +public: + using TBase = NActors::TActorBootstrapped>; + using IRetryPolicy = IRetryPolicy; + + explicit TQueryRetryActor(const NActors::TActorId& replyActorId, const TArgs&... args) + : ReplyActorId(replyActorId) + , RetryPolicy(IRetryPolicy::GetExponentialBackoffPolicy( + Retryable, TDuration::MilliSeconds(10), + TDuration::MilliSeconds(200), TDuration::Seconds(1), + std::numeric_limits::max(), TDuration::Seconds(1) + )) + , CreateQueryActor([=]() { + return new TQueryActor(args...); + }) + {} + + TQueryRetryActor(const NActors::TActorId& replyActorId, IRetryPolicy::TPtr retryPolicy, const TArgs&... args) + : ReplyActorId(replyActorId) + , RetryPolicy(retryPolicy) + , CreateQueryActor([=]() { + return new TQueryActor(args...); + }) + , RetryState(RetryPolicy->CreateRetryState()) + {} + + void StartQueryActor() const { + TBase::Register(CreateQueryActor()); + } + + void Bootstrap() { + TBase::Become(&TQueryRetryActor::StateFunc); + StartQueryActor(); + } + + STRICT_STFUNC(StateFunc, + hFunc(NActors::TEvents::TEvWakeup, Wakeup); + hFunc(TResponse, Handle); + ) + + void Wakeup(NActors::TEvents::TEvWakeup::TPtr&) { + StartQueryActor(); + } + + void Handle(const typename TResponse::TPtr& ev) { + const Ydb::StatusIds::StatusCode status = ev->Get()->Status; + if (Retryable(status) == ERetryErrorClass::NoRetry) { + Reply(ev); + return; + } + + if (RetryState == nullptr) { + RetryState = RetryPolicy->CreateRetryState(); + } + + if (auto delay = RetryState->GetNextRetryDelay(status)) { + TBase::Schedule(*delay, new NActors::TEvents::TEvWakeup()); + } else { + Reply(ev); + } + } + + void Reply(const typename TResponse::TPtr& ev) { + TBase::Send(ev->Forward(ReplyActorId)); + TBase::PassAway(); + } + + static ERetryErrorClass Retryable(Ydb::StatusIds::StatusCode status) { + if (status == Ydb::StatusIds::SUCCESS) { + return ERetryErrorClass::NoRetry; + } + + if (status == Ydb::StatusIds::INTERNAL_ERROR + || status == Ydb::StatusIds::UNAVAILABLE + || status == Ydb::StatusIds::BAD_SESSION + || status == Ydb::StatusIds::SESSION_EXPIRED + || status == Ydb::StatusIds::SESSION_BUSY + || status == Ydb::StatusIds::TIMEOUT + || status == Ydb::StatusIds::ABORTED) { + return ERetryErrorClass::ShortRetry; + } + + if (status == Ydb::StatusIds::OVERLOADED + || status == Ydb::StatusIds::UNDETERMINED) { + return ERetryErrorClass::LongRetry; + } + + return ERetryErrorClass::NoRetry; + } + +private: + const NActors::TActorId ReplyActorId; + const IRetryPolicy::TPtr RetryPolicy; + const std::function CreateQueryActor; + IRetryPolicy::IRetryState::TPtr RetryState = nullptr; +}; + } // namespace NKikimr diff --git a/ydb/library/yql/ast/yql_constraint.cpp b/ydb/library/yql/ast/yql_constraint.cpp index 57c9991be59e..48201346529e 100644 --- a/ydb/library/yql/ast/yql_constraint.cpp +++ b/ydb/library/yql/ast/yql_constraint.cpp @@ -542,7 +542,8 @@ TSortedConstraintNode::DoGetSimplifiedForType(const TTypeAnnotationNode& type, T ++it; if (ssize_t(GetElementsCount(subType)) == std::distance(from, it)) { - *from++ = std::make_pair(TPartOfConstraintBase::TSetType{std::move(prefix)}, from->second); + *from = std::make_pair(TPartOfConstraintBase::TSetType{std::move(prefix)}, from->second); + ++from; it = content.erase(from, it); changed = setChanged = true; } diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 006fb580d2de..3d09376a3083 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -3087,8 +3087,8 @@ std::unordered_set GetUselessSortedJoinInputs(const TCoEquiJoin& equiJoin) if (!joinTree->Head().IsAtom("Cross")) { std::unordered_map tableJoinKeys; for (const auto keys : {joinTree->Child(3), joinTree->Child(4)}) - for (ui32 i = 0U; i < keys->ChildrenSize(); ++i) - tableJoinKeys[keys->Child(i)->Content()].insert_unique(TPartOfConstraintBase::TPathType(1U, keys->Child(++i)->Content())); + for (ui32 i = 0U; i < keys->ChildrenSize(); i += 2) + tableJoinKeys[keys->Child(i)->Content()].insert_unique(TPartOfConstraintBase::TPathType(1U, keys->Child(i + 1)->Content())); for (const auto& [label, joinKeys]: tableJoinKeys) { if (const auto it = sorteds.find(label); sorteds.cend() != it) { diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index b2fed99eb33e..859768b32ad8 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -151,6 +151,7 @@ STRICT_STFUNC_EXC(TDqComputeActorCheckpoints::StateFunc, hFunc(TEvDqCompute::TEvRun, Handle); hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle); hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); hFunc(TEvRetryQueuePrivate::TEvRetry, Handle); hFunc(TEvents::TEvWakeup, Handle); cFunc(TEvents::TEvPoisonPill::EventType, PassAway);, @@ -393,6 +394,13 @@ void TDqComputeActorCheckpoints::Handle(NActors::TEvInterconnect::TEvNodeConnect EventsQueue.HandleNodeConnected(ev->Get()->NodeId); } +void TDqComputeActorCheckpoints::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + LOG_D("Handle undelivered"); + if (!EventsQueue.HandleUndelivered(ev)) { + LOG_E("TEvUndelivered: " << ev->Get()->SourceType); + } +} + void TDqComputeActorCheckpoints::Handle(TEvRetryQueuePrivate::TEvRetry::TPtr& ev) { Y_UNUSED(ev); EventsQueue.Retry(); diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h index 4bc93b6179ea..e33ba0495912 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h @@ -127,6 +127,7 @@ class TDqComputeActorCheckpoints : public NActors::TActorSender == RecipientId && ev->Get()->Reason == NActors::TEvents::TEvUndelivered::Disconnected) { + Connected = false; + ScheduleRetry(); + return true; + } + + return false; +} + void TRetryEventsQueue::Retry() { RetryScheduled = false; if (!Connected) { diff --git a/ydb/library/yql/dq/actors/compute/retry_queue.h b/ydb/library/yql/dq/actors/compute/retry_queue.h index 7ca82b9506a1..875aef00c99e 100644 --- a/ydb/library/yql/dq/actors/compute/retry_queue.h +++ b/ydb/library/yql/dq/actors/compute/retry_queue.h @@ -120,10 +120,16 @@ class TRetryEventsQueue { } return false; } + + bool RemoveConfirmedEvents() { + RemoveConfirmedEvents(MyConfirmedSeqNo); + return !Events.empty(); + } void OnNewRecipientId(const NActors::TActorId& recipientId, bool unsubscribe = true); void HandleNodeConnected(ui32 nodeId); void HandleNodeDisconnected(ui32 nodeId); + bool HandleUndelivered(NActors::TEvents::TEvUndelivered::TPtr& ev); void Retry(); void Unsubscribe(); @@ -160,7 +166,7 @@ class TRetryEventsQueue { THolder ev = MakeHolder(); ev->Record = Event->Record; ev->Record.MutableTransportMeta()->SetConfirmedSeqNo(confirmedSeqNo); - return MakeHolder(Recipient, Sender, ev.Release(), 0, Cookie); + return MakeHolder(Recipient, Sender, ev.Release(), NActors::IEventHandle::FlagTrackDelivery, Cookie); } private: diff --git a/ydb/library/yql/dq/integration/yql_dq_integration.h b/ydb/library/yql/dq/integration/yql_dq_integration.h index 7765ab128e28..5f17e627905a 100644 --- a/ydb/library/yql/dq/integration/yql_dq_integration.h +++ b/ydb/library/yql/dq/integration/yql_dq_integration.h @@ -62,7 +62,7 @@ class IDqIntegration { virtual bool CanBlockRead(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) = 0; virtual void RegisterMkqlCompiler(NCommon::TMkqlCallableCompilerBase& compiler) = 0; virtual bool CanFallback() = 0; - virtual void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) = 0; + virtual void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t maxPartitions) = 0; virtual void FillSinkSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sinkType) = 0; virtual void FillTransformSettings(const TExprNode& node, ::google::protobuf::Any& settings) = 0; virtual void Annotate(const TExprNode& node, THashMap& params) = 0; diff --git a/ydb/library/yql/dq/opt/dq_opt_hopping.cpp b/ydb/library/yql/dq/opt/dq_opt_hopping.cpp new file mode 100644 index 000000000000..ff84188dc929 --- /dev/null +++ b/ydb/library/yql/dq/opt/dq_opt_hopping.cpp @@ -0,0 +1,793 @@ +#include "dq_opt_hopping.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NYql::NDq::NHopping { + +using namespace NYql; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +struct THoppingTraits { + TString Column; + TCoHoppingTraits Traits; + ui64 Hop; + ui64 Interval; + ui64 Delay; +}; + + struct TKeysDescription { + TVector PickleKeys; + TVector MemberKeys; + TVector FakeKeys; + + TKeysDescription(const TStructExprType& rowType, const TCoAtomList& keys, const TString& hoppingColumn) { + for (const auto& key : keys) { + if (key.StringValue() == hoppingColumn) { + FakeKeys.emplace_back(key.StringValue()); + continue; + } + + const auto index = rowType.FindItem(key.StringValue()); + Y_ENSURE(index); + + auto itemType = rowType.GetItems()[*index]->GetItemType(); + if (RemoveOptionalType(itemType)->GetKind() == ETypeAnnotationKind::Data) { + MemberKeys.emplace_back(key.StringValue()); + continue; + } + + PickleKeys.emplace_back(key.StringValue()); + } + } + + TExprNode::TPtr BuildPickleLambda(TExprContext& ctx, TPositionHandle pos) const { + TCoArgument arg = Build(ctx, pos) + .Name("item") + .Done(); + + TExprBase body = arg; + + for (const auto& key : PickleKeys) { + const auto member = Build(ctx, pos) + .Name().Build(key) + .Struct(arg) + .Done() + .Ptr(); + + body = Build(ctx, pos) + .Struct(body) + .Name().Build(key) + .Item(ctx.NewCallable(pos, "StablePickle", { member })) + .Done(); + } + + return Build(ctx, pos) + .Args({arg}) + .Body(body) + .Done() + .Ptr(); + } + + TExprNode::TPtr BuildUnpickleLambda(TExprContext& ctx, TPositionHandle pos, const TStructExprType& rowType) { + TCoArgument arg = Build(ctx, pos) + .Name("item") + .Done(); + + TExprBase body = arg; + + for (const auto& key : PickleKeys) { + const auto index = rowType.FindItem(key); + Y_ENSURE(index); + + auto itemType = rowType.GetItems().at(*index)->GetItemType(); + const auto member = Build(ctx, pos) + .Name().Build(key) + .Struct(arg) + .Done() + .Ptr(); + + body = Build(ctx, pos) + .Struct(body) + .Name().Build(key) + .Item(ctx.NewCallable(pos, "Unpickle", { ExpandType(pos, *itemType, ctx), member })) + .Done(); + } + + return Build(ctx, pos) + .Args({arg}) + .Body(body) + .Done() + .Ptr(); + } + + TVector GetKeysList(TExprContext& ctx, TPositionHandle pos) const { + TVector res; + res.reserve(PickleKeys.size() + MemberKeys.size()); + + for (const auto& pickleKey : PickleKeys) { + res.emplace_back(Build(ctx, pos).Value(pickleKey).Done()); + } + for (const auto& memberKey : MemberKeys) { + res.emplace_back(Build(ctx, pos).Value(memberKey).Done()); + } + return res; + } + + TVector GetActualGroupKeys() { + TVector result; + result.reserve(PickleKeys.size() + MemberKeys.size()); + result.insert(result.end(), PickleKeys.begin(), PickleKeys.end()); + result.insert(result.end(), MemberKeys.begin(), MemberKeys.end()); + return result; + } + + bool NeedPickle() const { + return !PickleKeys.empty(); + } + + TExprNode::TPtr GetKeySelector(TExprContext& ctx, TPositionHandle pos, const TStructExprType* rowType) { + auto builder = Build(ctx, pos); + for (auto key : GetKeysList(ctx, pos)) { + builder.Add(std::move(key)); + } + return BuildKeySelector(pos, *rowType, builder.Build().Value().Ptr(), ctx); + } +}; + +TString BuildColumnName(const TExprBase& column) { + if (const auto columnName = column.Maybe()) { + return columnName.Cast().StringValue(); + } + + if (const auto columnNames = column.Maybe()) { + TStringBuilder columnNameBuilder; + for (const auto columnName : columnNames.Cast()) { + columnNameBuilder.append(columnName.StringValue()); + columnNameBuilder.append("_"); + } + return columnNameBuilder; + } + + YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " + << column.Ptr()->Dump()); +} + +bool IsLegacyHopping(const TExprNode::TPtr& hoppingSetting) { + return !hoppingSetting->Child(1)->IsList(); +} + +void EnsureNotDistinct(const TCoAggregate& aggregate) { + const auto& aggregateHandlers = aggregate.Handlers(); + + YQL_ENSURE( + AllOf(aggregateHandlers, [](const auto& t){ return !t.DistinctName(); }), + "Distinct is not supported for aggregation with hop"); +} + +TMaybe ExtractHopTraits(const TCoAggregate& aggregate, TExprContext& ctx, bool analyticsMode) { + const auto pos = aggregate.Pos(); + + const auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); + if (!hopSetting) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Aggregate over stream must have 'hopping' setting")); + return Nothing(); + } + + const auto hoppingColumn = IsLegacyHopping(hopSetting) + ? "_yql_time" + : TString(hopSetting->Child(1)->Child(0)->Content()); + + const auto traitsNode = IsLegacyHopping(hopSetting) + ? hopSetting->Child(1) + : hopSetting->Child(1)->Child(1); + + const auto maybeTraits = TMaybeNode(traitsNode); + if (!maybeTraits) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Invalid 'hopping' setting in Aggregate")); + return Nothing(); + } + + const auto traits = maybeTraits.Cast(); + + const auto checkIntervalParam = [&] (TExprBase param) -> ui64 { + if (param.Maybe()) { + param = param.Cast().Input(); + } + if (!param.Maybe()) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Not an interval data ctor")); + return 0; + } + auto value = FromString(param.Cast().Literal().Value()); + if (value <= 0) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval value must be positive")); + return 0; + } + return (ui64)value; + }; + + const auto hop = checkIntervalParam(traits.Hop()); + if (!hop) { + return Nothing(); + } + const auto interval = checkIntervalParam(traits.Interval()); + if (!interval) { + return Nothing(); + } + const auto delay = checkIntervalParam(traits.Delay()); + if (!delay) { + return Nothing(); + } + + if (interval < hop) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval must be greater or equal then hop")); + return Nothing(); + } + if (delay < hop) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Delay must be greater or equal then hop")); + return Nothing(); + } + + const auto newTraits = Build(ctx, aggregate.Pos()) + .InitFrom(traits) + .DataWatermarks(analyticsMode + ? ctx.NewAtom(aggregate.Pos(), "false") + : traits.DataWatermarks().Ptr()) + .Done(); + + return THoppingTraits { + hoppingColumn, + newTraits, + hop, + interval, + delay + }; +} + +TExprNode::TPtr BuildTimeExtractor(const TCoHoppingTraits& hoppingTraits, TExprContext& ctx) { + const auto pos = hoppingTraits.Pos(); + + if (hoppingTraits.ItemType().Ref().GetTypeAnn()->Cast()->GetType()->Cast()->GetSize() == 0) { + // The case when no fields are used in lambda. F.e. when it has only DependsOn. + return ctx.DeepCopyLambda(hoppingTraits.TimeExtractor().Ref()); + } + + return Build(ctx, pos) + .Args({"item"}) + .Body() + .Apply(hoppingTraits.TimeExtractor()) + .With(0) + .Type(hoppingTraits.ItemType()) + .Value("item") + .Build() + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildInitHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto& aggregateHandlers = aggregate.Handlers(); + + const auto initItemArg = Build(ctx, pos).Name("item").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + ui32 index = 0; + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + + TMaybeNode applier; + if (tuple.Trait().Cast().InitHandler().Args().Size() == 1) { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().InitHandler()) + .With(0, initItemArg) + .Done(); + } else { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().InitHandler()) + .With(0, initItemArg) + .With(1) + .Literal().Build(ToString(index)) + .Build() + .Done(); + } + + structItems.push_back(Build(ctx, pos) + .Name().Build(BuildColumnName(tuple.ColumnName())) + .Value(applier) + .Done()); + ++index; + } + + return Build(ctx, pos) + .Args({initItemArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildUpdateHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + const auto updateItemArg = Build(ctx, pos).Name("item").Done(); + const auto updateStateArg = Build(ctx, pos).Name("state").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + i32 index = 0; + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(updateStateArg) + .Name().Build(columnName) + .Done(); + + TMaybeNode applier; + if (tuple.Trait().Cast().UpdateHandler().Args().Size() == 2) { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().UpdateHandler()) + .With(0, updateItemArg) + .With(1, member) + .Done(); + } else { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().UpdateHandler()) + .With(0, updateItemArg) + .With(1, member) + .With(2) + .Literal().Build(ToString(index)) + .Build() + .Done(); + } + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value(applier) + .Done()); + ++index; + } + + return Build(ctx, pos) + .Args({updateItemArg, updateStateArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr WrapToShuffle( + const TKeysDescription& keysDescription, + const TCoAggregate& aggregate, + const TDqConnection& input, + TExprContext& ctx) +{ + auto pos = aggregate.Pos(); + + TDqStageBase mappedInput = input.Output().Stage(); + if (keysDescription.NeedPickle()) { + mappedInput = Build(ctx, pos) + .Inputs() + .Add() + .Output() + .Stage(input.Output().Stage()) + .Index(input.Output().Index()) + .Build() + .Build() + .Build() + .Program() + .Args({"stream"}) + .Body() + .Input("stream") + .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) + .Build() + .Build() + .Settings(TDqStageSettings().BuildNode(ctx, pos)) + .Done(); + } + + return Build(ctx, pos) + .Output() + .Stage(mappedInput) + .Index().Value("0").Build() + .Build() + .KeyColumns() + .Add(keysDescription.GetKeysList(ctx, pos)) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildMergeHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto& aggregateHandlers = aggregate.Handlers(); + + const auto mergeState1Arg = Build(ctx, pos).Name("state1").Done(); + const auto mergeState2Arg = Build(ctx, pos).Name("state2").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member1 = Build(ctx, pos) + .Struct(mergeState1Arg) + .Name().Build(columnName) + .Done(); + const auto member2 = Build(ctx, pos) + .Struct(mergeState2Arg) + .Name().Build(columnName) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value() + .Apply(tuple.Trait().Cast().MergeHandler()) + .With(0, member1) + .With(1, member2) + .Build() + .Done()); + } + + return Build(ctx, pos) + .Args({mergeState1Arg, mergeState2Arg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildFinishHopLambda( + const TCoAggregate& aggregate, + const TVector& actualGroupKeys, + const TString& hoppingColumn, + TExprContext& ctx) +{ + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + const auto finishKeyArg = Build(ctx, pos).Name("key").Done(); + const auto finishStateArg = Build(ctx, pos).Name("state").Done(); + const auto finishTimeArg = Build(ctx, pos).Name("time").Done(); + + TVector structItems; + structItems.reserve(actualGroupKeys.size() + aggregateHandlers.Size() + 1); + + if (actualGroupKeys.size() == 1) { + structItems.push_back(Build(ctx, pos) + .Name().Build(actualGroupKeys[0]) + .Value(finishKeyArg) + .Done()); + } else { + for (size_t i = 0; i < actualGroupKeys.size(); ++i) { + structItems.push_back(Build(ctx, pos) + .Name().Build(actualGroupKeys[i]) + .Value() + .Tuple(finishKeyArg) + .Index() + .Value(ToString(i)) + .Build() + .Build() + .Done()); + } + } + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString compoundColumnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(finishStateArg) + .Name().Build(compoundColumnName) + .Done(); + + if (tuple.ColumnName().Maybe()) { + structItems.push_back(Build(ctx, pos) + .Name().Build(compoundColumnName) + .Value() + .Apply(tuple.Trait().Cast().FinishHandler()) + .With(0, member) + .Build() + .Done()); + + continue; + } + + if (const auto namesList = tuple.ColumnName().Maybe()) { + const auto expApplier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().FinishHandler()) + .With(0, member) + .Done(); + + int index = 0; + for (const auto columnName : namesList.Cast()) { + const auto extracter = Build(ctx, pos) + .Tuple(expApplier) + .Index().Build(index++) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name(columnName) + .Value(extracter) + .Done()); + } + + continue; + } + + YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " + << tuple.ColumnName().Ptr()->Dump()); + } + + structItems.push_back(Build(ctx, pos) + .Name().Build(hoppingColumn) + .Value(finishTimeArg) + .Done()); + + return Build(ctx, pos) + .Args({finishKeyArg, finishStateArg, finishTimeArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildSaveHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + const auto saveStateArg = Build(ctx, pos).Name("state").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(saveStateArg) + .Name().Build(columnName) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value() + .Apply(tuple.Trait().Cast().SaveHandler()) + .With(0, member) + .Build() + .Done()); + } + + return Build(ctx, pos) + .Args({saveStateArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildLoadHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + TCoArgument loadStateArg = Build(ctx, pos).Name("state").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(loadStateArg) + .Name().Build(columnName) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value() + .Apply(tuple.Trait().Cast().LoadHandler()) + .With(0, member) + .Build() + .Done()); + } + + return Build(ctx, pos) + .Args({loadStateArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TMaybe BuildWatermarkMode( + const TCoAggregate& aggregate, + const TCoHoppingTraits& hoppingTraits, + TExprContext& ctx, + bool analyticsMode, + bool defaultWatermarksMode, + bool syncActor) +{ + const bool enableWatermarks = !analyticsMode && + defaultWatermarksMode && + hoppingTraits.Version().Cast().StringValue() == "v2"; + if (enableWatermarks && syncActor) { + ctx.AddError(TIssue(ctx.GetPosition(aggregate.Pos()), "Watermarks should be used only with async compute actor")); + return Nothing(); + } + + if (hoppingTraits.Version().Cast().StringValue() == "v2" && !enableWatermarks) { + ctx.AddError(TIssue( + ctx.GetPosition(aggregate.Pos()), + "HoppingWindow requires watermarks to be enabled. If you don't want to do that, you can use HOP instead.")); + return Nothing(); + } + + return enableWatermarks; +} + +TMaybeNode RewriteAsHoppingWindow( + const TExprBase node, + TExprContext& ctx, + const TDqConnection& input, + bool analyticsMode, + TDuration lateArrivalDelay, + bool defaultWatermarksMode, + bool syncActor) { + const auto aggregate = node.Cast(); + const auto pos = aggregate.Pos(); + + YQL_CLOG(DEBUG, ProviderDq) << "OptimizeStreamingAggregate"; + + EnsureNotDistinct(aggregate); + + const auto maybeHopTraits = ExtractHopTraits(aggregate, ctx, analyticsMode); + if (!maybeHopTraits) { + return nullptr; + } + const auto hopTraits = *maybeHopTraits; + + const auto aggregateInputType = GetSeqItemType(*node.Ptr()->Head().GetTypeAnn()).Cast(); + TKeysDescription keysDescription(*aggregateInputType, aggregate.Keys(), hopTraits.Column); + + if (keysDescription.NeedPickle()) { + return Build(ctx, pos) + .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) + .Input() + .InitFrom(aggregate) + .Input() + .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) + .Input(input) + .Build() + .Build() + .Done(); + } + + const auto keyLambda = keysDescription.GetKeySelector(ctx, pos, aggregateInputType); + const auto timeExtractorLambda = BuildTimeExtractor(hopTraits.Traits, ctx); + const auto initLambda = BuildInitHopLambda(aggregate, ctx); + const auto updateLambda = BuildUpdateHopLambda(aggregate, ctx); + const auto saveLambda = BuildSaveHopLambda(aggregate, ctx); + const auto loadLambda = BuildLoadHopLambda(aggregate, ctx); + const auto mergeLambda = BuildMergeHopLambda(aggregate, ctx); + const auto finishLambda = BuildFinishHopLambda(aggregate, keysDescription.GetActualGroupKeys(), hopTraits.Column, ctx); + const auto enableWatermarks = BuildWatermarkMode(aggregate, hopTraits.Traits, ctx, analyticsMode, defaultWatermarksMode, syncActor); + if (!enableWatermarks) { + return nullptr; + } + + const auto streamArg = Build(ctx, pos).Name("stream").Done(); + auto multiHoppingCoreBuilder = Build(ctx, pos) + .KeyExtractor(keyLambda) + .TimeExtractor(timeExtractorLambda) + .Hop(hopTraits.Traits.Hop()) + .Interval(hopTraits.Traits.Interval()) + .DataWatermarks(hopTraits.Traits.DataWatermarks()) + .InitHandler(initLambda) + .UpdateHandler(updateLambda) + .MergeHandler(mergeLambda) + .FinishHandler(finishLambda) + .SaveHandler(saveLambda) + .LoadHandler(loadLambda) + .template WatermarkMode().Build(ToString(*enableWatermarks)); + + if (*enableWatermarks) { + const auto hop = TDuration::MicroSeconds(hopTraits.Hop); + multiHoppingCoreBuilder.template Delay() + .Literal().Build(ToString(Max(hop, lateArrivalDelay).MicroSeconds())) + .Build(); + } else { + multiHoppingCoreBuilder.Delay(hopTraits.Traits.Delay()); + } + + if (analyticsMode) { + return Build(ctx, node.Pos()) + .Input(input.Ptr()) + .KeySelectorLambda(keyLambda) + .SortDirections() + .Literal() + .Value("true") + .Build() + .Build() + .SortKeySelectorLambda(timeExtractorLambda) + .ListHandlerLambda() + .Args(streamArg) + .template Body() + .Stream(multiHoppingCoreBuilder + .template Input() + .List(streamArg) + .Build() + .Done()) + .Build() + .Build() + .Done(); + } else { + auto wrappedInput = input.Ptr(); + if (!keysDescription.MemberKeys.empty()) { + // Shuffle input connection by keys + wrappedInput = WrapToShuffle(keysDescription, aggregate, input, ctx); + if (!wrappedInput) { + return nullptr; + } + } + + const auto stage = Build(ctx, node.Pos()) + .Inputs() + .Add(wrappedInput) + .Build() + .Program() + .Args(streamArg) + .Body() + .Input(multiHoppingCoreBuilder + .template Input() + .Input(streamArg) + .Build() + .Done()) + .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) + .Build() + .Build() + .Settings(TDqStageSettings().BuildNode(ctx, node.Pos())) + .Done(); + + return Build(ctx, node.Pos()) + .Output() + .Stage(stage) + .Index().Build(0) + .Build() + .Done(); + } +} + + +} // NYql::NDq::NHopping diff --git a/ydb/library/yql/dq/opt/dq_opt_hopping.h b/ydb/library/yql/dq/opt/dq_opt_hopping.h new file mode 100644 index 000000000000..7d690f6ab2fa --- /dev/null +++ b/ydb/library/yql/dq/opt/dq_opt_hopping.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include + +namespace NYql::NDq::NHopping { + +NNodes::TMaybeNode RewriteAsHoppingWindow( + const NNodes::TExprBase node, + TExprContext& ctx, + const NNodes::TDqConnection& input, + bool analyticsHopping, + TDuration lateArrivalDelay, + bool defaultWatermarksMode, + bool syncActor); + +} // namespace NYql::NDq::NHopping diff --git a/ydb/library/yql/dq/opt/ya.make b/ydb/library/yql/dq/opt/ya.make index 15c6c03dafa4..b1c6a87b94c8 100644 --- a/ydb/library/yql/dq/opt/ya.make +++ b/ydb/library/yql/dq/opt/ya.make @@ -15,6 +15,7 @@ SRCS( dq_opt.cpp dq_opt_build.cpp dq_opt_join.cpp + dq_opt_hopping.cpp dq_opt_log.cpp dq_opt_peephole.cpp dq_opt_phy_finalizing.cpp diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp index b98153d66991..8777bb439bc4 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp @@ -272,7 +272,7 @@ void TGraceJoinPacker::Pack() { case NUdf::EDataSlot::Interval: WriteUnaligned(buffPtr, value.Get()); break; case NUdf::EDataSlot::Date32: - WriteUnaligned(buffPtr, value.Get()); break; + WriteUnaligned(buffPtr, value.Get()); break; case NUdf::EDataSlot::Datetime64: WriteUnaligned(buffPtr, value.Get()); break; case NUdf::EDataSlot::Timestamp64: @@ -300,7 +300,7 @@ void TGraceJoinPacker::Pack() { } case NUdf::EDataSlot::TzTimestamp: { - WriteUnaligned(buffPtr, value.Get()); + WriteUnaligned(buffPtr, value.Get()); WriteUnaligned(buffPtr + sizeof(ui64), value.GetTimezoneId()); break; } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index 51f78cb971d0..af4027dee5f7 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -76,7 +76,7 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings } - XXH64_hash_t hash = XXH64(TempTuple.data(), TempTuple.size() * sizeof(ui64), 0); + XXH64_hash_t hash = XXH64(TempTuple.data() + NullsBitmapSize_, (TempTuple.size() - NullsBitmapSize_) * sizeof(ui64), 0); if (!hash) hash = 1; @@ -298,6 +298,8 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef std::swap(JoinTable1, JoinTable2); } + ui64 tuplesFound = 0; + std::vector> joinSlots, spillSlots, slotToIdx; std::vector> stringsOffsets1, stringsOffsets2; ui64 reservedSize = 6 * (DefaultTupleBytes * DefaultTuplesNum) / sizeof(ui64); @@ -320,22 +322,28 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef ui64 nullsSize2 = JoinTable2->NullsBitmapSize_; ui64 keyIntOffset1 = HashSize + nullsSize1; ui64 keyIntOffset2 = HashSize + nullsSize2; + bool table1HasKeyStringColumns = (JoinTable1->NumberOfKeyStringColumns != 0); + bool table2HasKeyStringColumns = (JoinTable2->NumberOfKeyStringColumns != 0); + bool table1HasKeyIColumns = (JoinTable1->NumberOfKeyIColumns != 0); + bool table2HasKeyIColumns = (JoinTable2->NumberOfKeyIColumns != 0); + if ( bucket2->TuplesNum > bucket1->TuplesNum ) { std::swap(bucket1, bucket2); std::swap(headerSize1, headerSize2); std::swap(nullsSize1, nullsSize2); std::swap(keyIntOffset1, keyIntOffset2); + std::swap(table1HasKeyStringColumns, table2HasKeyStringColumns); + std::swap(table1HasKeyIColumns, table2HasKeyIColumns); } joinResults.reserve(3 * bucket1->TuplesNum ); - ui64 headerSize = JoinTable1->HeaderSize; - ui64 slotSize = headerSize; + ui64 slotSize = headerSize2; ui64 avgStringsSize = ( 3 * (bucket2->KeyIntVals.size() - bucket2->TuplesNum * headerSize2) ) / ( 2 * bucket2->TuplesNum + 1) + 1; - if (JoinTable1->NumberOfKeyStringColumns != 0 || JoinTable1->NumberOfKeyIColumns != 0) { + if (table2HasKeyStringColumns || table2HasKeyIColumns ) { slotSize = slotSize + avgStringsSize; } @@ -352,7 +360,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef while (it2 != bucket2->KeyIntVals.end() ) { ui64 keysValSize; - if ( JoinTable2->NumberOfKeyStringColumns > 0 || JoinTable2->NumberOfKeyIColumns > 0) { + if ( table2HasKeyStringColumns || table2HasKeyIColumns) { keysValSize = headerSize2 + *(it2 + headerSize2 - 1) ; } else { keysValSize = headerSize2; @@ -397,7 +405,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef while ( it1 < bucket1->KeyIntVals.end() ) { ui64 keysValSize; - if ( JoinTable1->NumberOfKeyStringColumns > 0 || JoinTable1->NumberOfKeyIColumns > 0) { + if ( table1HasKeyStringColumns || table1HasKeyIColumns ) { keysValSize = headerSize1 + *(it1 + headerSize1 - 1) ; } else { keysValSize = headerSize1; @@ -417,24 +425,28 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef auto slotIt = joinSlots.begin() + slotNum * slotSize; while (*slotIt != 0 && slotIt != joinSlots.end()) { + bool matchFound = false; - if (keysValSize <= slotSize && !JoinTable1->NumberOfKeyIColumns ) { + if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !table1HasKeyIColumns ) { if (std::equal(it1 + keyIntOffset1, it1 + keysValSize, slotIt + keyIntOffset2)) { + tuplesFound++; matchFound = true; } } - if (keysValSize > slotSize && !JoinTable1->NumberOfKeyIColumns ) { + if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !table1HasKeyIColumns) { if (std::equal(it1 + keyIntOffset1, it1 + headerSize1, slotIt + keyIntOffset2)) { ui64 stringsPos = *(slotIt + headerSize2); ui64 stringsSize = *(it1 + headerSize1 - 1); if (std::equal(it1 + headerSize1, it1 + headerSize1 + stringsSize, spillSlots.begin() + stringsPos)) { + tuplesFound++; matchFound = true; } } } - if (JoinTable1->NumberOfKeyIColumns) + + if (table1HasKeyIColumns) { bool headerMatch = false; bool stringsMatch = false; @@ -451,7 +463,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef slotStringsStart = spillSlots.begin() + stringsPos; } - if ( JoinTable1->NumberOfKeyStringColumns == 0) { + if ( !table1HasKeyStringColumns) { stringsMatch = true; } else { ui64 stringsSize = *(it1 + headerSize1 - 1); @@ -478,32 +490,33 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef } if (headerMatch && stringsMatch && iValuesMatch) { + tuplesFound++; matchFound = true; } - } + } - if (matchFound) + if (matchFound) + { + JoinTuplesIds joinIds; + joinIds.id1 = tuple1Idx; + joinIds.id2 = slotToIdx[(slotIt - joinSlots.begin()) / slotSize]; + if (JoinTable2->TableBuckets[bucket].TuplesNum > JoinTable1->TableBuckets[bucket].TuplesNum) { - JoinTuplesIds joinIds; - joinIds.id1 = tuple1Idx; - joinIds.id2 = slotToIdx[(slotIt - joinSlots.begin()) / slotSize]; - if (JoinTable2->TableBuckets[bucket].TuplesNum > JoinTable1->TableBuckets[bucket].TuplesNum) - { - std::swap(joinIds.id1, joinIds.id2); - } - joinResults.emplace_back(joinIds); + std::swap(joinIds.id1, joinIds.id2); } - - slotIt += slotSize; - if (slotIt == joinSlots.end()) - slotIt = joinSlots.begin(); + joinResults.emplace_back(joinIds); } + slotIt += slotSize; + if (slotIt == joinSlots.end()) + slotIt = joinSlots.begin(); + } it1 += keysValSize; tuple1Idx ++; } + std::sort(joinResults.begin(), joinResults.end(), [](JoinTuplesIds a, JoinTuplesIds b) { if (a.id1 < b.id1) return true; @@ -555,6 +568,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef HasMoreLeftTuples_ = hasMoreLeftTuples; HasMoreRightTuples_ = hasMoreRightTuples; + TuplesFound_ += tuplesFound; } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h index 3eb2056d02df..c6e60d85e819 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h @@ -169,6 +169,8 @@ class TTable { bool Table2Initialized_ = false; // True when iterator counters for second table already initialized + ui64 TuplesFound_ = 0; // Total number of matching keys found during join + public: // Adds new tuple to the table. intColumns, stringColumns - data of columns, diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp index 2853ea5c275f..319073885250 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp @@ -1522,12 +1522,12 @@ Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) { const auto iterator = graph->GetValue().GetListIterator(); NUdf::TUnboxedValue tuple; - UNIT_ASSERT(iterator.Next(tuple)); - UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X"); - UNIT_ASSERT(!tuple.GetElement(1)); UNIT_ASSERT(iterator.Next(tuple)); UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A"); UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get(), 1); + UNIT_ASSERT(iterator.Next(tuple)); + UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X"); + UNIT_ASSERT(!tuple.GetElement(1)); UNIT_ASSERT(!iterator.Next(tuple)); UNIT_ASSERT(!iterator.Next(tuple)); } diff --git a/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp b/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp index 2a5e7ec832a5..f5aa18638eb5 100644 --- a/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp +++ b/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp @@ -75,7 +75,7 @@ class TClickHouseDqIntegration: public TDqIntegrationBase { return 0ULL; } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override { const TDqSource source(&node); if (const auto maySettings = source.Settings().Maybe()) { const auto settings = maySettings.Cast(); diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp index 98f43a555531..38e013eee5cc 100644 --- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp +++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp @@ -51,7 +51,7 @@ bool TDqIntegrationBase::CanFallback() { return false; } -void TDqIntegrationBase::FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&) { +void TDqIntegrationBase::FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&, size_t) { } void TDqIntegrationBase::FillSinkSettings(const TExprNode&, ::google::protobuf::Any&, TString&) { diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h index a2720db03331..d658d2e018ca 100644 --- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h +++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h @@ -18,7 +18,7 @@ class TDqIntegrationBase: public IDqIntegration { bool CanBlockRead(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) override; TExprNode::TPtr WrapWrite(const TExprNode::TPtr& write, TExprContext& ctx) override; bool CanFallback() override; - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) override; + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t) override; void FillSinkSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sinkType) override; void FillTransformSettings(const TExprNode& node, ::google::protobuf::Any& settings) override; void Annotate(const TExprNode& node, THashMap& params) override; diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto index fb845b8cee7b..1838bdd39710 100644 --- a/ydb/library/yql/providers/common/proto/gateways_config.proto +++ b/ydb/library/yql/providers/common/proto/gateways_config.proto @@ -563,12 +563,11 @@ message TGenericClusterConfig { // Credentials used to access data source instance optional NYql.NConnector.NApi.TCredentials Credentials = 10; - // Credentials used to access MDB API. - // When working with data source instances deployed in a cloud, - // you should either set (ServiceAccountId, ServiceAccountIdSignature) pair, - // or set IAM Token. - // The names of these fields must satisfy this template function: - // https://github.com/ydb-platform/ydb/arcadia/contrib/ydb/core/fq/libs/actors/clusters_from_connections.cpp?rev=r11823087#L19 + // Credentials used to access managed databases APIs. + // When working with external data source instances deployed in clouds, + // one should either set (ServiceAccountId, ServiceAccountIdSignature) pair + // that will be resolved into IAM Token via Token Accessor, + // or provide IAM Token directly. optional string ServiceAccountId = 6; optional string ServiceAccountIdSignature = 7; optional string Token = 11; @@ -592,9 +591,11 @@ message TGenericClusterConfig { message TGenericConnectorConfig { // Connector instance network endpoint optional NYql.NConnector.NApi.TEndpoint Endpoint = 3; - // If true, GRPC Client will use TLS encryption. - // Server cert will be verified with system CA cert pool. + // If true, Connector GRPC Client will use TLS encryption. optional bool UseSsl = 4; + // Path to the custom CA certificate to verify Connector's certs. + // If empty, the default system CA certificate pool will be used. + optional string SslCaCrt = 5; reserved 1, 2; } @@ -607,9 +608,14 @@ message TGenericGatewayConfig { // Database clusters supported by this particular instance repeated TGenericClusterConfig ClusterMapping = 3; - // MDB API endpoint (do not fill in case of on-prem deployment) + // MDB API endpoint (no need to fill in case of on-prem deployment). optional string MdbGateway = 4; + // YDB MVP API endpoint (no need to fill in case of on-prem deployment). + // Expected format: + // [http|https]://host:port/ydbc/cloud-prod/ + optional string YdbMvpEndpoint = 7; + repeated TAttr DefaultSettings = 6; reserved 1, 2; @@ -618,7 +624,9 @@ message TGenericGatewayConfig { /////////////////////////////// Db Resolver /////////////////////////////////// message TDbResolverConfig { - // Ydb / Yds mvp endpoint + // Ydb / Yds MVP endpoint. + // Expected format: + // [http|https]://host:port/ydbc/cloud-prod/ optional string YdbMvpEndpoint = 2; } diff --git a/ydb/library/yql/providers/dq/common/ya.make b/ydb/library/yql/providers/dq/common/ya.make index 82704ed75da4..b5a953a629ca 100644 --- a/ydb/library/yql/providers/dq/common/ya.make +++ b/ydb/library/yql/providers/dq/common/ya.make @@ -8,6 +8,7 @@ PEERDIR( ydb/library/yql/utils/log ydb/library/yql/dq/actors ydb/library/yql/dq/proto + ydb/library/yql/dq/integration ) GENERATE_ENUM_SERIALIZATION(yql_dq_settings.h) diff --git a/ydb/library/yql/providers/dq/counters/counters.h b/ydb/library/yql/providers/dq/counters/counters.h index d99722fae175..c7edf1cfa038 100644 --- a/ydb/library/yql/providers/dq/counters/counters.h +++ b/ydb/library/yql/providers/dq/counters/counters.h @@ -61,6 +61,10 @@ struct TCounters { Counters[name] = TEntry(value); } + void SetTimeCounter(const TString& name, i64 value) const { + SetCounter(name, value * 1000); // ms => us + } + THashMap& GetHistogram(const TString& name) { return Histograms[name]; } diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp index b294d57a6180..f36fe4333310 100644 --- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -23,33 +24,6 @@ using namespace NYql; using namespace NYql::NDq; using namespace NYql::NNodes; -namespace { - - -TString BuildColumnName(const TExprBase column) { - if (const auto columnName = column.Maybe()) { - return columnName.Cast().StringValue(); - } - - if (const auto columnNames = column.Maybe()) { - TStringBuilder columnNameBuilder; - for (const auto columnName : columnNames.Cast()) { - columnNameBuilder.append(columnName.StringValue()); - columnNameBuilder.append("_"); - } - return columnNameBuilder; - } - - YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " - << column.Ptr()->Dump()); -} - -bool IsLegacyHopping(const TExprNode::TPtr& hoppingSetting) { - return !hoppingSetting->Child(1)->IsList(); -} - -} - class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { public: TDqsLogicalOptProposalTransformer(TTypeAnnotationContext* typeCtx, const TDqConfiguration::TPtr& config) @@ -336,7 +310,13 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); if (input) { if (hopSetting) { - return RewriteAsHoppingWindow(node, ctx, input.Cast()); + bool analyticsHopping = Config->AnalyticsHopping.Get().GetOrElse(false); + const auto lateArrivalDelay = TDuration::MilliSeconds(Config->WatermarksLateArrivalDelayMs + .Get() + .GetOrElse(TDqSettings::TDefault::WatermarksLateArrivalDelayMs)); + bool defaultWatermarksMode = Config->WatermarksMode.Get() == "default"; + bool syncActor = Config->ComputeActorType.Get() != "async"; + return NHopping::RewriteAsHoppingWindow(node, ctx, input.Cast(), analyticsHopping, lateArrivalDelay, defaultWatermarksMode, syncActor); } else { return DqRewriteAggregate(node, ctx, TypesCtx, true, Config->UseAggPhases.Get().GetOrElse(false), Config->UseFinalizeByKey.Get().GetOrElse(false)); } @@ -508,382 +488,6 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { } private: - TMaybeNode RewriteAsHoppingWindow(const TExprBase node, TExprContext& ctx, const TDqConnection& input) { - const auto aggregate = node.Cast(); - const auto pos = aggregate.Pos(); - - YQL_CLOG(DEBUG, ProviderDq) << "OptimizeStreamingAggregate"; - - EnsureNotDistinct(aggregate); - - const auto maybeHopTraits = ExtractHopTraits(aggregate, ctx); - if (!maybeHopTraits) { - return nullptr; - } - const auto hopTraits = *maybeHopTraits; - - const auto aggregateInputType = GetSeqItemType(*node.Ptr()->Head().GetTypeAnn()).Cast(); - TKeysDescription keysDescription(*aggregateInputType, aggregate.Keys(), hopTraits.Column); - - if (keysDescription.NeedPickle()) { - return Build(ctx, pos) - .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) - .Input() - .InitFrom(aggregate) - .Input() - .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) - .Input(input) - .Build() - .Build() - .Done(); - } - - const auto keyLambda = keysDescription.GetKeySelector(ctx, pos, aggregateInputType); - const auto timeExtractorLambda = BuildTimeExtractor(hopTraits.Traits, ctx); - const auto initLambda = BuildInitHopLambda(aggregate, ctx); - const auto updateLambda = BuildUpdateHopLambda(aggregate, ctx); - const auto saveLambda = BuildSaveHopLambda(aggregate, ctx); - const auto loadLambda = BuildLoadHopLambda(aggregate, ctx); - const auto mergeLambda = BuildMergeHopLambda(aggregate, ctx); - const auto finishLambda = BuildFinishHopLambda(aggregate, keysDescription.GetActualGroupKeys(), hopTraits.Column, ctx); - const auto enableWatermarks = BuildWatermarkMode(aggregate, hopTraits.Traits, ctx); - if (!enableWatermarks) { - return nullptr; - } - - const auto streamArg = Build(ctx, pos).Name("stream").Done(); - auto multiHoppingCoreBuilder = Build(ctx, pos) - .KeyExtractor(keyLambda) - .TimeExtractor(timeExtractorLambda) - .Hop(hopTraits.Traits.Hop()) - .Interval(hopTraits.Traits.Interval()) - .DataWatermarks(hopTraits.Traits.DataWatermarks()) - .InitHandler(initLambda) - .UpdateHandler(updateLambda) - .MergeHandler(mergeLambda) - .FinishHandler(finishLambda) - .SaveHandler(saveLambda) - .LoadHandler(loadLambda) - .WatermarkMode().Build(ToString(*enableWatermarks)); - - if (*enableWatermarks) { - const auto hop = TDuration::MicroSeconds(hopTraits.Hop); - const auto lateArrivalDelay = TDuration::MilliSeconds(Config->WatermarksLateArrivalDelayMs - .Get() - .GetOrElse(TDqSettings::TDefault::WatermarksLateArrivalDelayMs)); - - multiHoppingCoreBuilder.Delay() - .Literal().Build(ToString(Max(hop, lateArrivalDelay).MicroSeconds())) - .Build(); - } else { - multiHoppingCoreBuilder.Delay(hopTraits.Traits.Delay()); - } - - if (Config->AnalyticsHopping.Get().GetOrElse(false)) { - return Build(ctx, node.Pos()) - .Input(input.Ptr()) - .KeySelectorLambda(keyLambda) - .SortDirections() - .Literal() - .Value("true") - .Build() - .Build() - .SortKeySelectorLambda(timeExtractorLambda) - .ListHandlerLambda() - .Args(streamArg) - .Body() - .Stream(multiHoppingCoreBuilder - .Input() - .List(streamArg) - .Build() - .Done()) - .Build() - .Build() - .Done(); - } else { - auto wrappedInput = input.Ptr(); - if (!keysDescription.MemberKeys.empty()) { - // Shuffle input connection by keys - wrappedInput = WrapToShuffle(keysDescription, aggregate, input, ctx); - if (!wrappedInput) { - return nullptr; - } - } - - const auto stage = Build(ctx, node.Pos()) - .Inputs() - .Add(wrappedInput) - .Build() - .Program() - .Args(streamArg) - .Body() - .Input(multiHoppingCoreBuilder - .Input() - .Input(streamArg) - .Build() - .Done()) - .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) - .Build() - .Build() - .Settings(TDqStageSettings().BuildNode(ctx, node.Pos())) - .Done(); - - return Build(ctx, node.Pos()) - .Output() - .Stage(stage) - .Index().Build(0) - .Build() - .Done(); - } - } - - struct THoppingTraits { - TString Column; - TCoHoppingTraits Traits; - ui64 Hop; - ui64 Interval; - ui64 Delay; - }; - - TMaybe ExtractHopTraits(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - - const auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); - if (!hopSetting) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Aggregate over stream must have 'hopping' setting")); - return Nothing(); - } - - const auto hoppingColumn = IsLegacyHopping(hopSetting) - ? "_yql_time" - : TString(hopSetting->Child(1)->Child(0)->Content()); - - const auto traitsNode = IsLegacyHopping(hopSetting) - ? hopSetting->Child(1) - : hopSetting->Child(1)->Child(1); - - const auto maybeTraits = TMaybeNode(traitsNode); - if (!maybeTraits) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Invalid 'hopping' setting in Aggregate")); - return Nothing(); - } - - const auto traits = maybeTraits.Cast(); - - const auto checkIntervalParam = [&] (TExprBase param) -> ui64 { - if (param.Maybe()) { - param = param.Cast().Input(); - } - if (!param.Maybe()) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Not an interval data ctor")); - return 0; - } - auto value = FromString(param.Cast().Literal().Value()); - if (value <= 0) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval value must be positive")); - return 0; - } - return (ui64)value; - }; - - const auto hop = checkIntervalParam(traits.Hop()); - if (!hop) { - return Nothing(); - } - const auto interval = checkIntervalParam(traits.Interval()); - if (!interval) { - return Nothing(); - } - const auto delay = checkIntervalParam(traits.Delay()); - if (!delay) { - return Nothing(); - } - - if (interval < hop) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval must be greater or equal then hop")); - return Nothing(); - } - if (delay < hop) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Delay must be greater or equal then hop")); - return Nothing(); - } - - const auto newTraits = Build(ctx, aggregate.Pos()) - .InitFrom(traits) - .DataWatermarks(Config->AnalyticsHopping.Get().GetOrElse(false) - ? ctx.NewAtom(aggregate.Pos(), "false") - : traits.DataWatermarks().Ptr()) - .Done(); - - return THoppingTraits { - hoppingColumn, - newTraits, - hop, - interval, - delay - }; - } - - struct TKeysDescription { - TVector PickleKeys; - TVector MemberKeys; - TVector FakeKeys; - - explicit TKeysDescription(const TStructExprType& rowType, const TCoAtomList& keys, const TString& hoppingColumn) { - for (const auto& key : keys) { - if (key.StringValue() == hoppingColumn) { - FakeKeys.emplace_back(key.StringValue()); - continue; - } - - const auto index = rowType.FindItem(key.StringValue()); - Y_ENSURE(index); - - auto itemType = rowType.GetItems()[*index]->GetItemType(); - if (RemoveOptionalType(itemType)->GetKind() == ETypeAnnotationKind::Data) { - MemberKeys.emplace_back(key.StringValue()); - continue; - } - - PickleKeys.emplace_back(key.StringValue()); - } - } - - TExprNode::TPtr BuildPickleLambda(TExprContext& ctx, TPositionHandle pos) const { - TCoArgument arg = Build(ctx, pos) - .Name("item") - .Done(); - - TExprBase body = arg; - - for (const auto& key : PickleKeys) { - const auto member = Build(ctx, pos) - .Name().Build(key) - .Struct(arg) - .Done() - .Ptr(); - - body = Build(ctx, pos) - .Struct(body) - .Name().Build(key) - .Item(ctx.NewCallable(pos, "StablePickle", { member })) - .Done(); - } - - return Build(ctx, pos) - .Args({arg}) - .Body(body) - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildUnpickleLambda(TExprContext& ctx, TPositionHandle pos, const TStructExprType& rowType) { - TCoArgument arg = Build(ctx, pos) - .Name("item") - .Done(); - - TExprBase body = arg; - - for (const auto& key : PickleKeys) { - const auto index = rowType.FindItem(key); - Y_ENSURE(index); - - auto itemType = rowType.GetItems().at(*index)->GetItemType(); - const auto member = Build(ctx, pos) - .Name().Build(key) - .Struct(arg) - .Done() - .Ptr(); - - body = Build(ctx, pos) - .Struct(body) - .Name().Build(key) - .Item(ctx.NewCallable(pos, "Unpickle", { ExpandType(pos, *itemType, ctx), member })) - .Done(); - } - - return Build(ctx, pos) - .Args({arg}) - .Body(body) - .Done() - .Ptr(); - } - - TVector GetKeysList(TExprContext& ctx, TPositionHandle pos) const { - TVector res; - res.reserve(PickleKeys.size() + MemberKeys.size()); - - for (const auto& pickleKey : PickleKeys) { - res.emplace_back(Build(ctx, pos).Value(pickleKey).Done()); - } - for (const auto& memberKey : MemberKeys) { - res.emplace_back(Build(ctx, pos).Value(memberKey).Done()); - } - return res; - } - - TVector GetActualGroupKeys() { - TVector result; - result.reserve(PickleKeys.size() + MemberKeys.size()); - result.insert(result.end(), PickleKeys.begin(), PickleKeys.end()); - result.insert(result.end(), MemberKeys.begin(), MemberKeys.end()); - return result; - } - - bool NeedPickle() const { - return !PickleKeys.empty(); - } - - TExprNode::TPtr GetKeySelector(TExprContext& ctx, TPositionHandle pos, const TStructExprType* rowType) { - auto builder = Build(ctx, pos); - for (auto key : GetKeysList(ctx, pos)) { - builder.Add(std::move(key)); - } - return BuildKeySelector(pos, *rowType, builder.Build().Value().Ptr(), ctx); - } - }; - - TExprNode::TPtr WrapToShuffle( - const TKeysDescription& keysDescription, - const TCoAggregate& aggregate, - const TDqConnection& input, - TExprContext& ctx) - { - auto pos = aggregate.Pos(); - - TDqStageBase mappedInput = input.Output().Stage(); - if (keysDescription.NeedPickle()) { - mappedInput = Build(ctx, pos) - .Inputs() - .Add() - .Output() - .Stage(input.Output().Stage()) - .Index(input.Output().Index()) - .Build() - .Build() - .Build() - .Program() - .Args({"stream"}) - .Body() - .Input("stream") - .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) - .Build() - .Build() - .Settings(TDqStageSettings().BuildNode(ctx, pos)) - .Done(); - } - - return Build(ctx, pos) - .Output() - .Stage(mappedInput) - .Index().Value("0").Build() - .Build() - .KeyColumns() - .Add(keysDescription.GetKeysList(ctx, pos)) - .Build() - .Done() - .Ptr(); - } void EnsureNotDistinct(const TCoAggregate& aggregate) { const auto& aggregateHandlers = aggregate.Handlers(); @@ -893,360 +497,6 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { "Distinct is not supported for aggregation with hop"); } - TExprNode::TPtr BuildTimeExtractor(const TCoHoppingTraits& hoppingTraits, TExprContext& ctx) { - const auto pos = hoppingTraits.Pos(); - - if (hoppingTraits.ItemType().Ref().GetTypeAnn()->Cast()->GetType()->Cast()->GetSize() == 0) { - // The case when no fields are used in lambda. F.e. when it has only DependsOn. - return ctx.DeepCopyLambda(hoppingTraits.TimeExtractor().Ref()); - } - - return Build(ctx, pos) - .Args({"item"}) - .Body() - .Apply(hoppingTraits.TimeExtractor()) - .With(0) - .Type(hoppingTraits.ItemType()) - .Value("item") - .Build() - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildInitHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto& aggregateHandlers = aggregate.Handlers(); - - const auto initItemArg = Build(ctx, pos).Name("item").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - ui32 index = 0; - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - - TMaybeNode applier; - if (tuple.Trait().Cast().InitHandler().Args().Size() == 1) { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().InitHandler()) - .With(0, initItemArg) - .Done(); - } else { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().InitHandler()) - .With(0, initItemArg) - .With(1) - .Literal().Build(ToString(index)) - .Build() - .Done(); - } - - structItems.push_back(Build(ctx, pos) - .Name().Build(BuildColumnName(tuple.ColumnName())) - .Value(applier) - .Done()); - ++index; - } - - return Build(ctx, pos) - .Args({initItemArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildUpdateHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - const auto updateItemArg = Build(ctx, pos).Name("item").Done(); - const auto updateStateArg = Build(ctx, pos).Name("state").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - i32 index = 0; - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(updateStateArg) - .Name().Build(columnName) - .Done(); - - TMaybeNode applier; - if (tuple.Trait().Cast().UpdateHandler().Args().Size() == 2) { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().UpdateHandler()) - .With(0, updateItemArg) - .With(1, member) - .Done(); - } else { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().UpdateHandler()) - .With(0, updateItemArg) - .With(1, member) - .With(2) - .Literal().Build(ToString(index)) - .Build() - .Done(); - } - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value(applier) - .Done()); - ++index; - } - - return Build(ctx, pos) - .Args({updateItemArg, updateStateArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildMergeHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto& aggregateHandlers = aggregate.Handlers(); - - const auto mergeState1Arg = Build(ctx, pos).Name("state1").Done(); - const auto mergeState2Arg = Build(ctx, pos).Name("state2").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member1 = Build(ctx, pos) - .Struct(mergeState1Arg) - .Name().Build(columnName) - .Done(); - const auto member2 = Build(ctx, pos) - .Struct(mergeState2Arg) - .Name().Build(columnName) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value() - .Apply(tuple.Trait().Cast().MergeHandler()) - .With(0, member1) - .With(1, member2) - .Build() - .Done()); - } - - return Build(ctx, pos) - .Args({mergeState1Arg, mergeState2Arg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildFinishHopLambda( - const TCoAggregate& aggregate, - const TVector& actualGroupKeys, - const TString& hoppingColumn, - TExprContext& ctx) - { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - const auto finishKeyArg = Build(ctx, pos).Name("key").Done(); - const auto finishStateArg = Build(ctx, pos).Name("state").Done(); - const auto finishTimeArg = Build(ctx, pos).Name("time").Done(); - - TVector structItems; - structItems.reserve(actualGroupKeys.size() + aggregateHandlers.Size() + 1); - - if (actualGroupKeys.size() == 1) { - structItems.push_back(Build(ctx, pos) - .Name().Build(actualGroupKeys[0]) - .Value(finishKeyArg) - .Done()); - } else { - for (size_t i = 0; i < actualGroupKeys.size(); ++i) { - structItems.push_back(Build(ctx, pos) - .Name().Build(actualGroupKeys[i]) - .Value() - .Tuple(finishKeyArg) - .Index() - .Value(ToString(i)) - .Build() - .Build() - .Done()); - } - } - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString compoundColumnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(finishStateArg) - .Name().Build(compoundColumnName) - .Done(); - - if (tuple.ColumnName().Maybe()) { - structItems.push_back(Build(ctx, pos) - .Name().Build(compoundColumnName) - .Value() - .Apply(tuple.Trait().Cast().FinishHandler()) - .With(0, member) - .Build() - .Done()); - - continue; - } - - if (const auto namesList = tuple.ColumnName().Maybe()) { - const auto expApplier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().FinishHandler()) - .With(0, member) - .Done(); - - int index = 0; - for (const auto columnName : namesList.Cast()) { - const auto extracter = Build(ctx, pos) - .Tuple(expApplier) - .Index().Build(index++) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name(columnName) - .Value(extracter) - .Done()); - } - - continue; - } - - YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " - << tuple.ColumnName().Ptr()->Dump()); - } - - structItems.push_back(Build(ctx, pos) - .Name().Build(hoppingColumn) - .Value(finishTimeArg) - .Done()); - - return Build(ctx, pos) - .Args({finishKeyArg, finishStateArg, finishTimeArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildSaveHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - const auto saveStateArg = Build(ctx, pos).Name("state").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(saveStateArg) - .Name().Build(columnName) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value() - .Apply(tuple.Trait().Cast().SaveHandler()) - .With(0, member) - .Build() - .Done()); - } - - return Build(ctx, pos) - .Args({saveStateArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildLoadHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - TCoArgument loadStateArg = Build(ctx, pos).Name("state").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(loadStateArg) - .Name().Build(columnName) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value() - .Apply(tuple.Trait().Cast().LoadHandler()) - .With(0, member) - .Build() - .Done()); - } - - return Build(ctx, pos) - .Args({loadStateArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TMaybe BuildWatermarkMode( - const TCoAggregate& aggregate, - const TCoHoppingTraits& hoppingTraits, - TExprContext& ctx) - { - const auto analyticsMode = Config->AnalyticsHopping.Get().GetOrElse(false); - const bool enableWatermarks = !analyticsMode && - Config->WatermarksMode.Get() == "default" && - hoppingTraits.Version().Cast().StringValue() == "v2"; - if (enableWatermarks && Config->ComputeActorType.Get() != "async") { - ctx.AddError(TIssue(ctx.GetPosition(aggregate.Pos()), "Watermarks should be used only with async compute actor")); - return Nothing(); - } - - if (hoppingTraits.Version().Cast().StringValue() == "v2" && !enableWatermarks) { - ctx.AddError(TIssue( - ctx.GetPosition(aggregate.Pos()), - "HoppingWindow requires watermarks to be enabled. If you don't want to do that, you can use HOP instead.")); - return Nothing(); - } - - return enableWatermarks; - } - IDqOptimization* GetDqOptCallback(const TExprBase& providerRead) const { if (providerRead.Ref().ChildrenSize() > 1 && TCoDataSource::Match(providerRead.Ref().Child(1))) { auto dataSourceName = providerRead.Ref().Child(1)->Child(0)->Content(); diff --git a/ydb/library/yql/providers/dq/planner/execution_planner.cpp b/ydb/library/yql/providers/dq/planner/execution_planner.cpp index b265d529d904..e1b40aa26fdf 100644 --- a/ydb/library/yql/providers/dq/planner/execution_planner.cpp +++ b/ydb/library/yql/providers/dq/planner/execution_planner.cpp @@ -558,7 +558,7 @@ namespace NYql::NDqs { TString sourceType; if (dqSource) { sourceSettings.ConstructInPlace(); - dqIntegration->FillSourceSettings(*read, *sourceSettings, sourceType); + dqIntegration->FillSourceSettings(*read, *sourceSettings, sourceType, maxPartitions); YQL_ENSURE(!sourceSettings->type_url().empty(), "Data source provider \"" << dataSourceName << "\" did't fill dq source settings for its dq source node"); YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceName << "\" did't fill dq source settings type for its dq source node"); } diff --git a/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp b/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp index e92dc952a3d9..37d899baae6d 100644 --- a/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp +++ b/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp @@ -127,13 +127,11 @@ class TTaskCommandExecutor { "TaskRunner", labels, name); - auto& old = CurrentJobStats[counterName]; if (name.EndsWith("Time")) { - QueryStat.AddTimeCounter(counterName, value - old); + QueryStat.SetTimeCounter(counterName, value); } else { - QueryStat.AddCounter(counterName, value - old); + QueryStat.SetCounter(counterName, value); } - old = value; } }); } @@ -766,7 +764,6 @@ class TTaskCommandExecutor { std::unique_ptr Alloc; NKikimr::NMiniKQL::TComputationNodeFactory ComputationFactory; TTaskTransformFactory TaskTransformFactory; - THashMap CurrentJobStats; NKikimr::NMiniKQL::IStatsRegistry* JobStats; bool TerminateOnError; TIntrusivePtr Runner; diff --git a/ydb/library/yql/providers/generic/actors/ya.make b/ydb/library/yql/providers/generic/actors/ya.make index 31ec4480c9ef..53f40afdca7c 100644 --- a/ydb/library/yql/providers/generic/actors/ya.make +++ b/ydb/library/yql/providers/generic/actors/ya.make @@ -3,15 +3,18 @@ LIBRARY() SRCS( yql_generic_read_actor.cpp yql_generic_source_factory.cpp + yql_generic_token_provider.cpp ) PEERDIR( ydb/library/yql/dq/actors/compute ydb/library/yql/minikql/computation + ydb/library/yql/providers/common/structured_token ydb/library/yql/providers/common/token_accessor/client ydb/library/yql/providers/generic/proto ydb/library/yql/public/types ydb/library/yql/providers/generic/connector/libcpp + ydb/public/sdk/cpp/client/ydb_types/credentials ) YQL_LAST_ABI_VERSION() diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp b/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp index 7de4f0a04ea1..51c02bb40456 100644 --- a/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp +++ b/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp @@ -1,4 +1,5 @@ #include "yql_generic_read_actor.h" +#include "yql_generic_token_provider.h" #include #include @@ -12,10 +13,10 @@ #include #include #include -#include #include #include #include +#include namespace NYql::NDq { @@ -102,16 +103,16 @@ namespace NYql::NDq { ui64 inputIndex, TCollectStatsLevel statsLevel, NConnector::IClient::TPtr client, - const NConnector::NApi::TSelect& select, - const NConnector::NApi::TDataSourceInstance& dataSourceInstance, + TGenericTokenProvider::TPtr tokenProvider, + Generic::TSource&& source, const NActors::TActorId& computeActorId, const NKikimr::NMiniKQL::THolderFactory& holderFactory) : InputIndex_(inputIndex) , ComputeActorId_(computeActorId) , Client_(std::move(client)) + , TokenProvider_(std::move(tokenProvider)) , HolderFactory_(holderFactory) - , Select_(select) - , DataSourceInstance_(dataSourceInstance) + , Source_(source) { IngressStats_.Level = statsLevel; } @@ -143,7 +144,9 @@ namespace NYql::NDq { // Prepare request NConnector::NApi::TListSplitsRequest request; - *request.mutable_selects()->Add() = Select_; + NConnector::NApi::TSelect select = Source_.select(); // copy TSelect from source + TokenProvider_->MaybeFillToken(*select.mutable_data_source_instance()); + *request.mutable_selects()->Add() = std::move(select); // Initialize stream Client_->ListSplits(request).Subscribe( @@ -236,8 +239,11 @@ namespace NYql::NDq { std::for_each( Splits_.cbegin(), Splits_.cend(), - [&](const NConnector::NApi::TSplit& split) { request.mutable_splits()->Add()->CopyFrom(split); }); - request.mutable_data_source_instance()->CopyFrom(DataSourceInstance_); + [&](const NConnector::NApi::TSplit& split) { + NConnector::NApi::TSplit splitCopy = split; + TokenProvider_->MaybeFillToken(*splitCopy.mutable_select()->mutable_data_source_instance()); + *request.mutable_splits()->Add() = std::move(split); + }); // Start streaming Client_->ReadSplits(request).Subscribe( @@ -403,8 +409,8 @@ namespace NYql::NDq { // It's very important to fill UV columns in the alphabet order, // paying attention to the scalar field containing block length. TVector fieldNames; - std::transform(Select_.what().items().cbegin(), - Select_.what().items().cend(), + std::transform(Source_.select().what().items().cbegin(), + Source_.select().what().items().cend(), std::back_inserter(fieldNames), [](const auto& item) { return item.column().name(); }); @@ -484,6 +490,7 @@ namespace NYql::NDq { const NActors::TActorId ComputeActorId_; NConnector::IClient::TPtr Client_; + TGenericTokenProvider::TPtr TokenProvider_; NConnector::IListSplitsStreamIterator::TPtr ListSplitsIterator_; TVector Splits_; // accumulated list of table splits NConnector::IReadSplitsStreamIterator::TPtr ReadSplitsIterator_; @@ -492,22 +499,21 @@ namespace NYql::NDq { NKikimr::NMiniKQL::TPlainContainerCache ArrowRowContainerCache_; const NKikimr::NMiniKQL::THolderFactory& HolderFactory_; - const NYql::NConnector::NApi::TSelect Select_; - const NYql::NConnector::NApi::TDataSourceInstance DataSourceInstance_; + Generic::TSource Source_; }; std::pair CreateGenericReadActor(NConnector::IClient::TPtr genericClient, - Generic::TSource&& params, + Generic::TSource&& source, ui64 inputIndex, TCollectStatsLevel statsLevel, const THashMap& /*secureParams*/, const THashMap& /*taskParams*/, const NActors::TActorId& computeActorId, - ISecuredServiceAccountCredentialsFactory::TPtr /*credentialsFactory*/, + ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory, const NKikimr::NMiniKQL::THolderFactory& holderFactory) { - const auto dsi = params.select().data_source_instance(); + const auto dsi = source.select().data_source_instance(); YQL_CLOG(INFO, ProviderGeneric) << "Creating read actor with params:" << " kind=" << NYql::NConnector::NApi::EDataSourceKind_Name(dsi.kind()) << ", endpoint=" << dsi.endpoint().ShortDebugString() @@ -526,6 +532,7 @@ namespace NYql::NDq { YQL_ENSURE(one != TString::npos && two != TString::npos && one < two, "Bad token format:" << token); */ + // Obtain token to access remote data source if necessary // TODO: partitioning is not implemented now, but this code will be useful for the further research: /* TStringBuilder part; @@ -539,12 +546,14 @@ namespace NYql::NDq { part << ';'; */ + auto tokenProvider = CreateGenericTokenProvider(source, credentialsFactory); + const auto actor = new TGenericReadActor( inputIndex, statsLevel, genericClient, - params.select(), - dsi, + std::move(tokenProvider), + std::move(source), computeActorId, holderFactory); diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp new file mode 100644 index 000000000000..e8430b87e9ec --- /dev/null +++ b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp @@ -0,0 +1,67 @@ +#include "yql_generic_token_provider.h" + +#include + +namespace NYql::NDq { + TGenericTokenProvider::TGenericTokenProvider( + const NYql::Generic::TSource& source, const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory) + : Source_(source) + , StaticIAMToken_(source.GetToken()) + , CredentialsProvider_(nullptr) + { + // 1. User has provided IAM-token itself. + // This token will be used during the whole lifetime of a read actor. + if (!StaticIAMToken_.empty()) { + return; + } + + // 2. User has provided service account creds. + // We create token accessor client that will renew token accessor by demand. + if (source.GetServiceAccountId() && source.GetServiceAccountIdSignature()) { + Y_ENSURE(credentialsFactory, "CredentialsFactory is not initialized"); + + auto structuredTokenJSON = + TStructuredTokenBuilder() + .SetServiceAccountIdAuth(source.GetServiceAccountId(), source.GetServiceAccountIdSignature()) + .ToJson(); + + // If service account is provided, obtain IAM-token + Y_ENSURE(structuredTokenJSON, "empty structured token"); + + auto credentialsProviderFactory = + CreateCredentialsProviderFactoryForStructuredToken(credentialsFactory, structuredTokenJSON, false); + CredentialsProvider_ = credentialsProviderFactory->CreateProvider(); + } + + // 3. If we reached this point, it means that user doesn't need token auth. + } + + void TGenericTokenProvider::MaybeFillToken(NConnector::NApi::TDataSourceInstance& dsi) const { + // 1. Don't need tokens if basic auth is set + if (dsi.credentials().has_basic()) { + return; + } + + *dsi.mutable_credentials()->mutable_token()->mutable_type() = "IAM"; + + // 2. If static IAM-token has been provided, use it + if (!StaticIAMToken_.empty()) { + *dsi.mutable_credentials()->mutable_token()->mutable_value() = StaticIAMToken_; + return; + } + + // 3. Otherwise use credentials provider to get token + Y_ENSURE(CredentialsProvider_, "CredentialsProvider is not initialized"); + + auto iamToken = CredentialsProvider_->GetAuthInfo(); + Y_ENSURE(iamToken, "CredentialsProvider returned empty IAM token"); + + *dsi.mutable_credentials()->mutable_token()->mutable_value() = std::move(iamToken); + } + + TGenericTokenProvider::TPtr + CreateGenericTokenProvider(const NYql::Generic::TSource& source, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory) { + return std::make_unique(source, credentialsFactory); + } +} //namespace NYql::NDq diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h new file mode 100644 index 000000000000..495a44c15e57 --- /dev/null +++ b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +namespace NYql::NDq { + // When accessing external data sources using authentication via tokens, + // there are two options: + // 1. Use static IAM-token provided by user (especially useful during debugging); + // 2. Use service account credentials in order to get (and refresh) IAM-token by demand. + class TGenericTokenProvider { + public: + using TPtr = std::unique_ptr; + + TGenericTokenProvider(const NYql::Generic::TSource& source, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory); + + void MaybeFillToken(NConnector::NApi::TDataSourceInstance& dsi) const; + + private: + NYql::Generic::TSource Source_; + TString StaticIAMToken_; + NYdb::TCredentialsProviderPtr CredentialsProvider_; + }; + + TGenericTokenProvider::TPtr + CreateGenericTokenProvider(const NYql::Generic::TSource& source, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory); +} //namespace NYql::NDq diff --git a/ydb/library/yql/providers/generic/connector/api/common/data_source.proto b/ydb/library/yql/providers/generic/connector/api/common/data_source.proto index ecfb64665c9e..d0f700280c09 100644 --- a/ydb/library/yql/providers/generic/connector/api/common/data_source.proto +++ b/ydb/library/yql/providers/generic/connector/api/common/data_source.proto @@ -32,6 +32,8 @@ enum EDataSourceKind { POSTGRESQL = 2; S3 = 3; YDB = 4; + MYSQL = 5; + MS_SQL_SERVER = 6; } // EProtocol generalizes various kinds of network protocols supported by different databases. diff --git a/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto b/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto index 67cd9588547c..7004f2686136 100644 --- a/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto +++ b/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto @@ -197,8 +197,10 @@ message TSplit { // ReadDataRequest reads the data associated with a particular table split. message TReadSplitsRequest { - // Data source instance to connect - TDataSourceInstance data_source_instance = 1; + // Data source instance to connect. + // Deprecated field: server implementations must rely on + // TDataSourceInstance provided in each TSelect. + TDataSourceInstance data_source_instance = 1 [deprecated = true]; // Splits that YQ engine would like to read. repeated TSplit splits = 2; diff --git a/ydb/library/yql/providers/generic/connector/libcpp/client.cpp b/ydb/library/yql/providers/generic/connector/libcpp/client.cpp index 9d6237808377..8280a4e36886 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/client.cpp +++ b/ydb/library/yql/providers/generic/connector/libcpp/client.cpp @@ -1,3 +1,5 @@ +#include + #include "client.h" namespace NYql::NConnector { @@ -21,10 +23,22 @@ namespace NYql::NConnector { public: TClientGRPC() = delete; TClientGRPC(const TGenericConnectorConfig& config) { - TString endpoint = TStringBuilder() << config.GetEndpoint().host() << ":" << ToString(config.GetEndpoint().port()); - GrpcConfig_ = NYdbGrpc::TGRpcClientConfig(endpoint); + GrpcConfig_ = NYdbGrpc::TGRpcClientConfig(); + + Y_ENSURE(config.GetEndpoint().host(), TStringBuilder() << "Empty host in TGenericConnectorConfig: " << config.DebugString()); + Y_ENSURE(config.GetEndpoint().port(), TStringBuilder() << "Empty port in TGenericConnectorConfig: " << config.DebugString()); + GrpcConfig_.Locator = TStringBuilder() << config.GetEndpoint().host() << ":" << config.GetEndpoint().port(); + GrpcConfig_.EnableSsl = config.GetUseSsl(); + // Read content of CA cert + TString rootCertData; + if (config.GetSslCaCrt()) { + rootCertData = TFileInput(config.GetSslCaCrt()).ReadAll(); + } + + GrpcConfig_.SslCredentials = grpc::SslCredentialsOptions{.pem_root_certs = rootCertData, .pem_private_key = "", .pem_cert_chain = ""}; + GrpcClient_ = std::make_unique(); // FIXME: is it OK to use single connection during the client lifetime? diff --git a/ydb/library/yql/providers/generic/connector/libcpp/client.h b/ydb/library/yql/providers/generic/connector/libcpp/client.h index 1d066e31b72a..7a2250798eb8 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/client.h +++ b/ydb/library/yql/providers/generic/connector/libcpp/client.h @@ -17,8 +17,6 @@ namespace NYql::NConnector { using TAsyncResult = NThreading::TFuture>; using TDescribeTableAsyncResult = TAsyncResult; - // using TListSplitsAsyncResult = TAsyncResult; - // using TReadSplitsAsyncResult = TAsyncResult; template class TStreamer { diff --git a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp index 09fbd4f8c599..7eed47039379 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp +++ b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp @@ -18,6 +18,5 @@ namespace NYql::NConnector::NTest { extern const TString DEFAULT_CH_SERVICE_ACCOUNT_ID_SIGNATURE = "sa_signature"; extern const TString DEFAULT_YDB_HOST = "localhost"; - extern const TString DEFAULT_YDB_DATABASE = "local"; extern const TString DEFAULT_YDB_ENDPOINT = TStringBuilder() << DEFAULT_YDB_HOST << ':' << DEFAULT_YDB_PORT; } // namespace NYql::NConnector::NTest diff --git a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h index f5e28b3e9f37..bbca9127a4bd 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h +++ b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h @@ -26,7 +26,6 @@ namespace NYql::NConnector::NTest { extern const TString DEFAULT_CH_SERVICE_ACCOUNT_ID; extern const TString DEFAULT_CH_SERVICE_ACCOUNT_ID_SIGNATURE; - extern const TString DEFAULT_YDB_DATABASE; extern const TString DEFAULT_YDB_HOST; constexpr int DEFAULT_YDB_PORT = 2136; extern const TString DEFAULT_YDB_ENDPOINT; diff --git a/ydb/library/yql/providers/generic/connector/tests/README.md b/ydb/library/yql/providers/generic/connector/tests/README.md new file mode 100644 index 000000000000..a474c36e673d --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/README.md @@ -0,0 +1,25 @@ +# Contribution guide + +When extending YDB Federated Query list of supported external datasources with new database / storage / whatever, +it's crucial to write integration tests. There's a kind of template for these tests consisting of: + +* Test scenario (`CREATE TABLE` / `INSERT` / `SELECT` and so on). +* Test cases parametrizing the scenario. +* Infrastructure code responsible for deploying the external datasource as the dockerized service. + +The basic scenario typically should imply the following steps: +1. Deploy datasource and connector services using `docker-compose`. +1. Initialize datasource in any way you like (either with predefined `*.sql` files mounted into container or programmatically from the test app). +1. Prepare `YQL` script to query data from the datasource. +1. Execute script with YDB-based CLI tools: `dqrun` and `kqprun`. +1. Validate output. + +## Directory structure + +* `common_test_cases` keeps basic test cases that can be used for testing any data source. +* `datasource` contains subfolders (`datasource/clickhouse`, `datasource/postgresql`, etc) with datasource-specific tests scenarios, test cases and `docker-compose.yml` file that is required to set up test environment. +* `join` contains tests checking cross-datasource scenarios. +* `utils` contains building blocks for tests: + * `utils/clients` stores code performing network IO; + * `utils/scenario` describes the typical scenarios of the data source usage (e. g. creating table, fullfilling it with test data etc.); + * `utils/types` describes the external data source's type system. diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/base.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/base.py similarity index 76% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/base.py rename to ydb/library/yql/providers/generic/connector/tests/common_test_cases/base.py index 4e8902a0372e..fb3c11e84f1f 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/base.py +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/base.py @@ -7,7 +7,6 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat from ydb.library.yql.providers.generic.connector.tests.utils.database import Database -from ydb.library.yql.providers.generic.connector.tests.utils.data_source_kind import data_source_kind_alias from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings @@ -20,7 +19,7 @@ class BaseTestCase: @property def name(self) -> str: - return f'{self.name_}_{data_source_kind_alias(self.data_source_kind)}_{EProtocol.Name(self.protocol)}' + return f'{self.name_}_{EProtocol.Name(self.protocol)}' @property def database(self) -> Database: @@ -35,7 +34,13 @@ def _table_name(self) -> str: In general, we cannot use test case name as table name because of special symbols, so we provide a random table name instead. ''' - return 't' + hashlib.sha256(str(random.randint(0, 65536)).encode('ascii')).hexdigest()[:8] + match self.data_source_kind: + case EDataSourceKind.POSTGRESQL: + return 't' + hashlib.sha256(str(random.randint(0, 65536)).encode('ascii')).hexdigest()[:8] + case EDataSourceKind.CLICKHOUSE: + return 't' + hashlib.sha256(str(random.randint(0, 65536)).encode('ascii')).hexdigest()[:8] + case EDataSourceKind.YDB: + return self.name @property def sql_table_name(self) -> str: @@ -70,5 +75,10 @@ def generic_settings(self) -> GenericSettings: clickhouse_clusters=[], postgresql_clusters=[GenericSettings.PostgreSQLCluster(database=self.database.name, schema=None)], ) + case EDataSourceKind.YDB: + return GenericSettings( + date_time_format=EDateTimeFormat.YQL_FORMAT, + ydb_clusters=[GenericSettings.YdbCluster(database=self.database.name)], + ) case _: raise Exception(f'invalid data source: {self.data_source_kind}') diff --git a/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py new file mode 100644 index 000000000000..ec47a556c42c --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py @@ -0,0 +1,25 @@ +from typing import List + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase + + +TestCase = BaseTestCase + + +class Factory: + def make_test_cases(self, data_source_kind: EDataSourceKind) -> List[TestCase]: + test_cases = [] + + test_case_name = 'missing_database' + + test_case = TestCase( + name_=test_case_name, + data_source_kind=data_source_kind, + protocol=EProtocol.NATIVE, + pragmas=dict(), + ) + + test_cases.append(test_case) + + return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py new file mode 100644 index 000000000000..3a2501e35480 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py @@ -0,0 +1,25 @@ +from typing import List + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase + + +TestCase = BaseTestCase + + +class Factory: + def make_test_cases(self, data_source_kind: EDataSourceKind) -> List[TestCase]: + test_cases = [] + + test_case_name = 'missing_table' + + test_case = TestCase( + name_=test_case_name, + data_source_kind=data_source_kind, + protocol=EProtocol.NATIVE, + pragmas=dict(), + ) + + test_cases.append(test_case) + + return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_common.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_positive_common.py similarity index 90% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_common.py rename to ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_positive_common.py index 714316837b4d..fff27b4cb4aa 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_common.py +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_positive_common.py @@ -7,8 +7,9 @@ from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings from ydb.library.yql.providers.generic.connector.tests.utils.generate import generate_table_data -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.ydb as Ydb from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -18,7 +19,7 @@ SelectWhere, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings @@ -68,12 +69,12 @@ def _column_selection(self) -> Sequence[TestCase]: Column( name='COL1', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4(), ydb=Ydb.Int32()), ), Column( name='col2', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4(), ydb=Ydb.Int32()), ), ) ) @@ -90,6 +91,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL, + EDataSourceKind.YDB, ), ), # SELECT COL1 FROM table @@ -102,6 +104,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, # NOTE: YQ-2264: doesn't work for PostgreSQL because of implicit cast to lowercase (COL1 -> col1) + EDataSourceKind.YDB, ), ), # SELECT col1 FROM table @@ -123,6 +126,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL, + EDataSourceKind.YDB, ), ), # SELECT col2, COL1 FROM table @@ -135,6 +139,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, # NOTE: YQ-2264: doesn't work for PostgreSQL because of implicit cast to lowercase (COL1 -> col1) + EDataSourceKind.YDB, ), ), # SELECT col2, col1 FROM table @@ -157,6 +162,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, # NOTE: YQ-2264: doesn't work for PostgreSQL because of implicit cast to lowercase (COL1 -> col1) + EDataSourceKind.YDB, ), ), # Select the same column multiple times with different aliases @@ -176,6 +182,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL, + EDataSourceKind.YDB, ), ), ) @@ -235,7 +242,6 @@ def _large_table(self) -> Sequence[TestCase]: ) data_in = generate_table_data(schema=schema, bytes_soft_limit=table_size) - print("BIRD", data_in) # Assuming that request will look something like: # @@ -272,14 +278,20 @@ def make_test_cases(self, data_source_kind: EDataSourceKind) -> Sequence[TestCas protocols = { EDataSourceKind.CLICKHOUSE: [EProtocol.NATIVE, EProtocol.HTTP], EDataSourceKind.POSTGRESQL: [EProtocol.NATIVE], + EDataSourceKind.YDB: [EProtocol.NATIVE], } - base_test_cases = list( - itertools.chain( - self._column_selection(), - self._large_table(), + base_test_cases = None + + if data_source_kind == EDataSourceKind.YDB: + base_test_cases = self._column_selection() + else: + base_test_cases = list( + itertools.chain( + self._column_selection(), + self._large_table(), + ) ) - ) test_cases = [] for base_tc in base_test_cases: diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/ya.make b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/ya.make similarity index 66% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/ya.make rename to ydb/library/yql/providers/generic/connector/tests/common_test_cases/ya.make index 211106748c43..49991fd287f1 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/ya.make @@ -1,18 +1,10 @@ PY3_LIBRARY() -STYLE_PYTHON() - PY_SRCS( base.py - collection.py - join.py - select_datetime.py select_missing_database.py select_missing_table.py - select_positive_clickhouse.py select_positive_common.py - select_positive_postgresql.py - select_positive_postgresql_schema.py ) PEERDIR( diff --git a/ydb/library/yql/providers/generic/connector/tests/conftest.py b/ydb/library/yql/providers/generic/connector/tests/conftest.py deleted file mode 100644 index e89287fd8ccf..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/conftest.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import TypeAlias - -import grpc -import pytest - -import ydb.library.yql.providers.generic.connector.api.service.connector_pb2_grpc as api -import yatest.common as yat - -from utils.settings import Settings -import utils.clickhouse -from utils.dqrun import DqRunner -from utils.kqprun import KqpRunner -from utils.runner import Runner -import utils.postgresql - - -@pytest.fixture -def settings() -> Settings: - return Settings.from_env() - - -@pytest.fixture -def clickhouse_client(settings) -> utils.clickhouse.Client: - client = utils.clickhouse.make_client(settings.clickhouse) - yield client - client.close() - - -@pytest.fixture -def postgresql_client(settings) -> utils.postgresql.Client: - return utils.postgresql.Client(settings.postgresql) - - -ConnectorClient: TypeAlias = api.ConnectorStub - - -@pytest.fixture -def connector_client(settings) -> ConnectorClient: - s = settings.connector - - channel = grpc.insecure_channel(f'{s.host}:{s.port}') - stub = ConnectorClient(channel) - return stub - - -def configure_runner(runner, settings) -> Runner: - if runner is DqRunner: - return DqRunner(dqrun_path=yat.build_path("ydb/library/yql/tools/dqrun/dqrun"), settings=settings) - elif runner is KqpRunner: - return KqpRunner(kqprun_path=yat.build_path("ydb/tests/tools/kqprun/kqprun"), settings=settings) - return None diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py new file mode 100644 index 000000000000..b8f4d5d442f2 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py @@ -0,0 +1,37 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind + +# test cases +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import select_positive +import select_datetime + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + 'select_missing_database': select_missing_database.Factory().make_test_cases(EDataSourceKind.CLICKHOUSE), + 'select_missing_table': select_missing_table.Factory().make_test_cases(EDataSourceKind.CLICKHOUSE), + 'select_positive': select_positive.Factory().make_test_cases() + + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.CLICKHOUSE), + 'select_datetime': select_datetime.Factory().make_test_cases(), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py new file mode 100644 index 000000000000..6f31ef8d80cd --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py @@ -0,0 +1,22 @@ +from typing import Final +import pathlib + +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client, make_client + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse") + + +@pytest.fixture +def settings() -> Settings: + return Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.CLICKHOUSE]) + + +@pytest.fixture +def clickhouse_client(settings) -> Client: + cl = make_client(settings.clickhouse) + yield cl + cl.close() diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml new file mode 100644 index 000000000000..16f0eb3ee86b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml @@ -0,0 +1,20 @@ +version: '3.4' +services: + clickhouse: + image: clickhouse/clickhouse-server:23-alpine@sha256:b078c1cd294632afa2aeba3530e7ba2e568513da23304354f455a25fab575c06 + container_name: fq-tests-ch-clickhouse + environment: + CLICKHOUSE_DB: db + CLICKHOUSE_USER: user + CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 + CLICKHOUSE_PASSWORD: password + ports: + - 9000 + - 8123 + fq-connector-go: + container_name: fq-tests-ch-fq-connector-go + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.5@sha256:7f086ce3869b84a59fd76a10a9de8125c0d382915e956d34832105e03829a61b + volumes: + - ../../fq-connector-go/:/opt/ydb/cfg/ + ports: + - 2130 diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_datetime.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_datetime.py similarity index 70% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_datetime.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_datetime.py index 22165fa715bb..95c390cf2507 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_datetime.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_datetime.py @@ -6,9 +6,8 @@ from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common as select_positive_common -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -173,68 +172,6 @@ def _make_test_yql_clickhouse(self) -> TestCase: pragmas=dict(), ) - def _make_test_yql_postgresql(self) -> TestCase: - schema = Schema( - columns=ColumnList( - Column( - name='col_0_id', - ydb_type=Type.INT32, - data_source_type=DataSourceType(pg=postgresql.Int4()), - ), - # TODO: timestamp - Column( - name='col_1_datetime64', - ydb_type=Type.TIMESTAMP, - data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), - ), - ), - ) - data_in = [ - # Date is OK for CH, but too early for YQL - [ - 1, - datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), - ], - # Date is OK for both CH and YQL - [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], - # Date is OK for CH, but too late for YQL - [ - 3, - datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), - ], - ] - - data_out = [ - [ - 1, - None, - ], - [ - 2, - # datetime.datetime(1988, 11, 20, 12, 23, 45, 678000).astimezone(ZoneInfo('UTC')).replace(tzinfo=None), - datetime.datetime(1988, 11, 20, 12, 23, 45, 678910), - ], - [ - 3, - None, - ], - ] - - test_case_name = self._name + '_YQL' - - return TestCase( - name_=test_case_name, - date_time_format=EDateTimeFormat.YQL_FORMAT, - data_in=data_in, - data_out_=data_out, - select_what=SelectWhat.asterisk(schema.columns), - select_where=None, - data_source_kind=EDataSourceKind.POSTGRESQL, - protocol=EProtocol.NATIVE, - schema=schema, - pragmas=dict(), - ) - def _make_test_string_clickhouse(self) -> TestCase: schema = Schema( columns=ColumnList( @@ -344,68 +281,8 @@ def _make_test_string_clickhouse(self) -> TestCase: pragmas=dict(), ) - def _make_test_string_postgresql(self) -> TestCase: - schema = Schema( - columns=ColumnList( - Column( - name='col_0_id', - ydb_type=Type.INT32, - data_source_type=DataSourceType(pg=postgresql.Int4()), - ), - # TODO: timestamp - Column( - name='col_1_datetime64', - ydb_type=Type.TIMESTAMP, - data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), - ), - ), - ) - data_in = [ - [ - 1, - datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), - ], - [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], - [ - 3, - datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), - ], - ] - - data_out = [ - [ - 1, - '1950-05-27T12:23:45.67891Z', - ], - [ - 2, - '1988-11-20T12:23:45.67891Z', - ], - [ - 3, - '2108-01-01T12:23:45.67891Z', - ], - ] - - test_case_name = self._name + '_string' - - return TestCase( - name_=test_case_name, - date_time_format=EDateTimeFormat.STRING_FORMAT, - data_in=data_in, - data_out_=data_out, - select_what=SelectWhat.asterisk(schema.columns), - select_where=None, - data_source_kind=EDataSourceKind.POSTGRESQL, - protocol=EProtocol.NATIVE, - schema=schema, - pragmas=dict(), - ) - def make_test_cases(self) -> Sequence[TestCase]: return [ self._make_test_yql_clickhouse(), - self._make_test_yql_postgresql(), self._make_test_string_clickhouse(), - self._make_test_string_postgresql(), ] diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_positive.py similarity index 98% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_clickhouse.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_positive.py index 19595cd96e84..de6af3d7cc51 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_clickhouse.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_positive.py @@ -6,7 +6,7 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -19,7 +19,7 @@ makeOptionalYdbTypeFromYdbType, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common import TestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common import TestCase class Factory: diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py new file mode 100644 index 000000000000..dad7d53bc4e3 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py @@ -0,0 +1,104 @@ +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.clickhouse as scenario + +from conftest import docker_compose_dir +from collection import Collection + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + + +# Global collection of test cases dependent on environment +tc_collection = Collection( + Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.CLICKHOUSE]) +) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_positive'), ids=tc_collection.ids('select_positive')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_positive( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + test_name=request.node.name, settings=settings, runner=runner, client=clickhouse_client, test_case=test_case + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_database'), ids=tc_collection.ids('select_missing_database') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_missing_database( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_missing_database.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + settings=settings, + runner=runner, + client=clickhouse_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_table'), ids=tc_collection.ids('select_missing_table') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_missing_table( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_missing_table.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + test_name=request.node.name, + settings=settings, + runner=runner, + client=clickhouse_client, + test_case=test_case, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_datetime'), ids=tc_collection.ids('select_datetime')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_datetime( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + test_name=request.node.name, + test_case=test_case, + settings=settings, + runner=runner, + client=clickhouse_client, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make new file mode 100644 index 000000000000..87d435816814 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make @@ -0,0 +1,66 @@ +PY3TEST() + +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=clickhouse) + +IF (AUTOCHECK) + # Split tests to chunks only when they're running on different machines with distbuild, + # otherwise this directive will slow down local test execution. + # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. + FORK_SUBTESTS() + + # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose + TAG( + ya:external + ya:force_sandbox + ya:fat + ) + + REQUIREMENTS( + container:4467981730 + cpu:all + dns:dns64 + ) +ENDIF() + +INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) + +# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, +# which makes it impossible to run them during precommit checks on Github CI. +# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + +IF (OPENSOURCE) + SIZE(MEDIUM) + SET(TEST_TAGS_VALUE) + SET(TEST_REQUIREMENTS_VALUE) +ENDIF() + +TEST_SRCS( + collection.py + conftest.py + select_datetime.py + select_positive.py + test.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/api/service/protos + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py new file mode 100644 index 000000000000..da6a46144b77 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py @@ -0,0 +1,37 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import select_datetime +import select_positive +import select_positive_with_schema + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + 'select_missing_database': select_missing_database.Factory().make_test_cases(EDataSourceKind.POSTGRESQL), + 'select_missing_table': select_missing_table.Factory().make_test_cases(EDataSourceKind.POSTGRESQL), + 'select_positive': select_positive.Factory().make_test_cases() + + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.POSTGRESQL), + 'select_positive_with_schema': select_positive_with_schema.Factory().make_test_cases(), + 'select_datetime': select_datetime.Factory().make_test_cases(), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py new file mode 100644 index 000000000000..e2e36087059d --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py @@ -0,0 +1,22 @@ +from typing import Final +import pathlib + +import pytest + + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client + + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/postgresql") + + +@pytest.fixture +def settings() -> Settings: + return Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.POSTGRESQL]) + + +@pytest.fixture +def postgresql_client(settings) -> Client: + return Client(settings.postgresql) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml new file mode 100644 index 000000000000..5808d9191470 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3.4' +services: + postgresql: + image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 + container_name: fq-tests-pg-postgresql + environment: + POSTGRES_DB: db + POSTGRES_USER: user + POSTGRES_PASSWORD: password + command: ["postgres", "-c", "log_statement=all", "-c", "log_connections=on", "-c", "log_disconnections=on"] + ports: + - 5432 + fq-connector-go: + container_name: fq-tests-pg-fq-connector-go + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.5@sha256:7f086ce3869b84a59fd76a10a9de8125c0d382915e956d34832105e03829a61b + volumes: + - ../../fq-connector-go/:/opt/ydb/cfg/ + ports: + - 2130 diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py new file mode 100644 index 000000000000..7c396bd5013a --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py @@ -0,0 +1,163 @@ +from dataclasses import dataclass +import datetime +from typing import Sequence + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol +from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat +from ydb.public.api.protos.ydb_value_pb2 import Type + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql +from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( + Schema, + Column, + ColumnList, + DataSourceType, + SelectWhat, +) +from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings + + +@dataclass +class TestCase(select_positive_common.TestCase): + date_time_format: EDateTimeFormat = EDateTimeFormat.YQL_FORMAT + + @property + def data_out(self) -> Sequence: + return self.data_out_ if self.data_out_ else self.data_in + + @property + def generic_settings(self) -> GenericSettings: + gs = super().generic_settings + gs.date_time_format = self.date_time_format + return gs + + +class Factory: + _name = 'datetime' + + def _make_test_yql_postgresql(self) -> TestCase: + schema = Schema( + columns=ColumnList( + Column( + name='col_0_id', + ydb_type=Type.INT32, + data_source_type=DataSourceType(pg=postgresql.Int4()), + ), + # TODO: timestamp + Column( + name='col_1_datetime64', + ydb_type=Type.TIMESTAMP, + data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), + ), + ), + ) + data_in = [ + # Date is OK for CH, but too early for YQL + [ + 1, + datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), + ], + # Date is OK for both CH and YQL + [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], + # Date is OK for CH, but too late for YQL + [ + 3, + datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), + ], + ] + + data_out = [ + [ + 1, + None, + ], + [ + 2, + # datetime.datetime(1988, 11, 20, 12, 23, 45, 678000).astimezone(ZoneInfo('UTC')).replace(tzinfo=None), + datetime.datetime(1988, 11, 20, 12, 23, 45, 678910), + ], + [ + 3, + None, + ], + ] + + test_case_name = self._name + '_YQL' + + return TestCase( + name_=test_case_name, + date_time_format=EDateTimeFormat.YQL_FORMAT, + data_in=data_in, + data_out_=data_out, + select_what=SelectWhat.asterisk(schema.columns), + select_where=None, + data_source_kind=EDataSourceKind.POSTGRESQL, + protocol=EProtocol.NATIVE, + schema=schema, + pragmas=dict(), + ) + + def _make_test_string_postgresql(self) -> TestCase: + schema = Schema( + columns=ColumnList( + Column( + name='col_0_id', + ydb_type=Type.INT32, + data_source_type=DataSourceType(pg=postgresql.Int4()), + ), + # TODO: timestamp + Column( + name='col_1_datetime64', + ydb_type=Type.TIMESTAMP, + data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), + ), + ), + ) + data_in = [ + [ + 1, + datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), + ], + [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], + [ + 3, + datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), + ], + ] + + data_out = [ + [ + 1, + '1950-05-27T12:23:45.67891Z', + ], + [ + 2, + '1988-11-20T12:23:45.67891Z', + ], + [ + 3, + '2108-01-01T12:23:45.67891Z', + ], + ] + + test_case_name = self._name + '_string' + + return TestCase( + name_=test_case_name, + date_time_format=EDateTimeFormat.STRING_FORMAT, + data_in=data_in, + data_out_=data_out, + select_what=SelectWhat.asterisk(schema.columns), + select_where=None, + data_source_kind=EDataSourceKind.POSTGRESQL, + protocol=EProtocol.NATIVE, + schema=schema, + pragmas=dict(), + ) + + def make_test_cases(self) -> Sequence[TestCase]: + return [ + self._make_test_yql_postgresql(), + self._make_test_string_postgresql(), + ] diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive.py similarity index 98% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive.py index 6a213d01809b..ac605f6576ff 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive.py @@ -5,7 +5,7 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -16,7 +16,7 @@ makeOptionalYdbTypeFromTypeID, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common import TestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common import TestCase class Factory: diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql_schema.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive_with_schema.py similarity index 79% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql_schema.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive_with_schema.py index b39192fdc76d..b2cb9bf457cf 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql_schema.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive_with_schema.py @@ -4,13 +4,12 @@ from random import choice from string import ascii_lowercase, digits -from utils.settings import GenericSettings from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -18,7 +17,7 @@ DataSourceType, SelectWhat, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase @dataclass @@ -51,19 +50,19 @@ def _select_with_pg_schema(self) -> Sequence[TestCase]: Column( name='COL1', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(pg=postgresql.Int4()), ), Column( name='col2', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(pg=postgresql.Int4()), ), ) ) select_what = SelectWhat.asterisk(column_list=schema.columns) - test_case_name = f'select_with_schema_{select_what}' + test_case_name = f'select_positive_with_schema_{select_what}' test_case = TestCase( name_=test_case_name, diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py new file mode 100644 index 000000000000..4483d6201d2f --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py @@ -0,0 +1,133 @@ +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.postgresql as scenario + +from conftest import docker_compose_dir +from collection import Collection + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + + +# Global collection of test cases dependent on environment +tc_collection = Collection( + Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.POSTGRESQL]) +) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_positive'), ids=tc_collection.ids('select_positive')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_positive( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_database'), ids=tc_collection.ids('select_missing_database') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_missing_database( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_missing_database.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_table'), ids=tc_collection.ids('select_missing_table') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_missing_table( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_missing_table.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + test_name=request.node.name, + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_datetime'), ids=tc_collection.ids('select_datetime')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_datetime( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", + tc_collection.get('select_positive_with_schema'), + ids=tc_collection.ids('select_positive_with_schema'), +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_positive_with_schema( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_pg_schema( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make new file mode 100644 index 000000000000..9c622fcf3b76 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make @@ -0,0 +1,66 @@ +PY3TEST() + +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=postgresql) + +IF (AUTOCHECK) + # Split tests to chunks only when they're running on different machines with distbuild, + # otherwise this directive will slow down local test execution. + # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. + FORK_SUBTESTS() + + # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose + TAG( + ya:external + ya:force_sandbox + ya:fat + ) + + REQUIREMENTS( + container:4467981730 + cpu:all + dns:dns64 + ) +ENDIF() + +INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) + +# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, +# which makes it impossible to run them during precommit checks on Github CI. +# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + +IF (OPENSOURCE) + SIZE(MEDIUM) + SET(TEST_TAGS_VALUE) + SET(TEST_REQUIREMENTS_VALUE) +ENDIF() + +TEST_SRCS( + collection.py + conftest.py + select_datetime.py + select_positive.py + select_positive_with_schema.py + test.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/ya.make new file mode 100644 index 000000000000..3f0f2c8f0117 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ya.make @@ -0,0 +1,5 @@ +RECURSE_FOR_TESTS( + clickhouse + postgresql + ydb +) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py new file mode 100644 index 000000000000..3c747d5fc9e2 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py @@ -0,0 +1,32 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind + +# import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +# import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + # 'select_missing_database': select_missing_database.Factory().make_test_cases(EDataSourceKind.YDB), + # 'select_missing_table': select_missing_table.Factory().make_test_cases(EDataSourceKind.YDB), + 'select_positive': select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.YDB), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py new file mode 100644 index 000000000000..5e12b074140b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py @@ -0,0 +1,4 @@ +from typing import Final +import pathlib + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/ydb") diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml new file mode 100644 index 000000000000..93a54ce2ffc8 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml @@ -0,0 +1,25 @@ +version: '3.4' +services: + ydb: + image: cr.yandex/yc/yandex-docker-local-ydb:23.3.17@sha256:bf9001c849cc6c4c9b56f32f5440a6e8390c4e841937c9f9caf929fd70a689c8 + container_name: fq-tests-ydb-ydb + hostname: fq-tests-ydb-ydb + environment: + YDB_DEFAULT_LOG_LEVEL: INFO + POSTGRES_USER: user + POSTGRES_PASSWORD: password + volumes: + - ./init/init_ydb:/init_ydb + - ./init/01_basic.sh:/01_basic.sh + + fq-connector-go: + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.12@sha256:dd2483ba061e25e8ee645bcc64cae8b8a0a93dba6772eb4b8ab0a0aab4b8dd48 + container_name: fq-tests-ydb-fq-connector-go + volumes: + - ../../fq-connector-go/:/opt/ydb/cfg/ + ports: + - 2130 + command: > + sh -c " + echo \"$$(dig fq-tests-ydb-ydb +short) fq-tests-ydb-ydb\" >> /etc/hosts; cat /etc/hosts; + /opt/ydb/bin/fq-connector-go server -c /opt/ydb/cfg/fq-connector-go.yaml" diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh new file mode 100755 index 000000000000..28daf80535da --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +/ydb -p tests-ydb-client yql -s ' + CREATE TABLE column_selection_A_b_C_d_E_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_A_b_C_d_E_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_COL1_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_COL1_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_asterisk_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_asterisk_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_col2_COL1_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_col2_COL1_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_col2_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_col2_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_col3_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_col3_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + ' + +echo $(date +"%T.%6N") "SUCCESS" diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py new file mode 100644 index 000000000000..288782268cdb --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py @@ -0,0 +1,50 @@ +import pytest +import time + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.ydb as scenario + +from conftest import docker_compose_dir +from collection import Collection + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + + +class OneTimeWaiter: + __launched: bool = False + + def wait(self): + if self.__launched: + return + + # This should be enough for tables to initialize + time.sleep(3) + self.__launched = True + + +one_time_waiter = OneTimeWaiter() + +settings = Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.YDB]) +tc_collection = Collection(settings) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_positive'), ids=tc_collection.ids('select_positive')) +def test_select_positive( + request: pytest.FixtureRequest, + runner_type: str, + test_case: select_positive_common.TestCase, +): + # Let YDB container initialize tables + one_time_waiter.wait() + + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + settings=settings, + runner=runner, + test_case=test_case, + test_name=request.node.name, + ) diff --git a/ydb/core/kqp/ut/federated_query/generic/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/ya.make similarity index 54% rename from ydb/core/kqp/ut/federated_query/generic/ya.make rename to ydb/library/yql/providers/generic/connector/tests/datasource/ydb/ya.make index 407624f56540..e79c76b3f4a9 100644 --- a/ydb/core/kqp/ut/federated_query/generic/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/ya.make @@ -1,4 +1,11 @@ -UNITTEST_FOR(ydb/core/kqp) +PY3TEST() + +STYLE_PYTHON() +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=ydb) IF (AUTOCHECK) # Split tests to chunks only when they're running on different machines with distbuild, @@ -20,24 +27,6 @@ IF (AUTOCHECK) ) ENDIF() -SRCS( - ch_recipe_ut_helpers.cpp - connector_recipe_ut_helpers.cpp - kqp_generic_plan_ut.cpp - kqp_generic_provider_join_ut.cpp - pg_recipe_ut_helpers.cpp -) - -PEERDIR( - contrib/libs/fmt - contrib/libs/libpqxx - library/cpp/clickhouse/client - ydb/core/kqp/ut/common - ydb/core/kqp/ut/federated_query/common - ydb/library/yql/providers/generic/connector/libcpp - ydb/library/yql/sql/pg_dummy -) - INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) # Including of docker_compose/recipe.inc automatically converts these tests into LARGE, @@ -50,6 +39,26 @@ IF (OPENSOURCE) SET(TEST_REQUIREMENTS_VALUE) ENDIF() -YQL_LAST_ABI_VERSION() +TEST_SRCS( + collection.py + conftest.py + test.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) END() diff --git a/ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml b/ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml new file mode 100644 index 000000000000..02c5903a5a4f --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml @@ -0,0 +1,25 @@ +connector_server: + endpoint: + host: "0.0.0.0" + port: 2130 + +logger: + log_level: DEBUG + enable_sql_query_logging: true + +metrics_server: + endpoint: + host: "0.0.0.0" + port: 8766 + +pprof_server: + endpoint: + host: "0.0.0.0" + port: 6060 + +paging: + bytes_per_page: 4194304 + prefetch_queue_capacity: 2 + +conversion: + use_unsafe_converters: true diff --git a/ydb/library/yql/providers/generic/connector/tests/join/collection.py b/ydb/library/yql/providers/generic/connector/tests/join/collection.py new file mode 100644 index 000000000000..ad61792d2cf7 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/collection.py @@ -0,0 +1,26 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +import test_case + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + 'join': test_case.Factory().make_test_cases(), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/join/conftest.py b/ydb/library/yql/providers/generic/connector/tests/join/conftest.py new file mode 100644 index 000000000000..19caea7afe9f --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/conftest.py @@ -0,0 +1,38 @@ +from typing import Final +import dataclasses +import pathlib + +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import ( + make_client as make_clickhouse_client, + Client as ClickHouseClient, +) +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client as PostgreSQLClient +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/join") + + +@pytest.fixture +def settings() -> Settings: + return Settings.from_env( + docker_compose_dir=docker_compose_dir, + data_source_kinds=[EDataSourceKind.POSTGRESQL, EDataSourceKind.CLICKHOUSE], + ) + + +@dataclasses.dataclass +class Clients: + ClickHouse: ClickHouseClient + PostgreSQL: PostgreSQLClient + + +@pytest.fixture +def clients(settings): + return Clients( + ClickHouse=make_clickhouse_client(settings=settings.clickhouse), + PostgreSQL=PostgreSQLClient(settings=settings.postgresql), + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml similarity index 58% rename from ydb/library/yql/providers/generic/connector/tests/docker-compose.yml rename to ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml index a8bfa53fb1ff..609c4a4942e6 100644 --- a/ydb/library/yql/providers/generic/connector/tests/docker-compose.yml +++ b/ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml @@ -1,17 +1,8 @@ version: '3.4' services: - postgresql: - image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 - container_name: ${USER}_connector-integration-tests-postgresql - environment: - POSTGRES_DB: db - POSTGRES_USER: user - POSTGRES_PASSWORD: password - ports: - - 5432 clickhouse: image: clickhouse/clickhouse-server:23-alpine@sha256:b078c1cd294632afa2aeba3530e7ba2e568513da23304354f455a25fab575c06 - container_name: ${USER}_connector-integration-tests-clickhouse + container_name: fq-tests-join-clickhouse environment: CLICKHOUSE_DB: db CLICKHOUSE_USER: user @@ -20,8 +11,20 @@ services: ports: - 9000 - 8123 + postgresql: + image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 + container_name: fq-tests-join-postgresql + environment: + POSTGRES_DB: db + POSTGRES_USER: user + POSTGRES_PASSWORD: password + command: ["postgres", "-c", "log_statement=all", "-c", "log_connections=on", "-c", "log_disconnections=on"] + ports: + - 5432 fq-connector-go: - container_name: ${USER}_connector-integration-tests-fq-connector-go - image: ghcr.io/ydb-platform/fq-connector-go:v0.1.1@sha256:47e24df143aee31a83d4a4cd0acc20b4cab8c03a9c63e81a6e99cb017a31f916 + container_name: fq-tests-join-fq-connector-go + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.5@sha256:7f086ce3869b84a59fd76a10a9de8125c0d382915e956d34832105e03829a61b + volumes: + - ../fq-connector-go/:/opt/ydb/cfg/ ports: - - 50051 + - 2130 diff --git a/ydb/library/yql/providers/generic/connector/tests/join.py b/ydb/library/yql/providers/generic/connector/tests/join/scenario.py similarity index 60% rename from ydb/library/yql/providers/generic/connector/tests/join.py rename to ydb/library/yql/providers/generic/connector/tests/join/scenario.py index beecd8dac96b..3eff1950d8c6 100644 --- a/ydb/library/yql/providers/generic/connector/tests/join.py +++ b/ydb/library/yql/providers/generic/connector/tests/join/scenario.py @@ -1,33 +1,32 @@ -from pathlib import Path -import utils.postgresql - from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner -from utils.comparator import data_outs_equal -from utils.log import make_logger -from utils.runner import Runner -from utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client as ClickHouseClient +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.clickhouse as clickhouse_scenario +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client as PostgreSQLClient +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.postgresql as postgresql_scenario -import clickhouse -import postgresql -import test_cases.join +from test_case import TestCase LOGGER = make_logger(__name__) def join( test_name: str, - test_case: test_cases.join.TestCase, + test_case: TestCase, settings: Settings, runner: Runner, - clickhouse_client: clickhouse.Client, - postgresql_client: utils.postgresql.Client, + clickhouse_client: ClickHouseClient, + postgresql_client: PostgreSQLClient, ): # prepare tables for data_source in test_case.data_sources: match data_source.kind: case EDataSourceKind.CLICKHOUSE: - clickhouse.prepare_table( + clickhouse_scenario.prepare_table( test_name=test_name, client=clickhouse_client, database=data_source.database, @@ -36,7 +35,7 @@ def join( schema=data_source.table.schema, ) case EDataSourceKind.POSTGRESQL: - postgresql.prepare_table( + postgresql_scenario.prepare_table( test_name=test_name, client=postgresql_client, database=data_source.database, diff --git a/ydb/library/yql/providers/generic/connector/tests/join/test.py b/ydb/library/yql/providers/generic/connector/tests/join/test.py new file mode 100644 index 000000000000..8af6b76f5a2a --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/test.py @@ -0,0 +1,40 @@ +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner + +import conftest +import scenario +from collection import Collection +from test_case import TestCase + +# Global collection of test cases dependent on environment +tc_collection = Collection( + Settings.from_env( + docker_compose_dir=conftest.docker_compose_dir, + data_source_kinds=[EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL], + ) +) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('join'), ids=tc_collection.ids('join')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clients") +def test_join( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clients: conftest.Clients, + test_case: TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.join( + test_name=request.node.name, + clickhouse_client=clients.ClickHouse, + postgresql_client=clients.PostgreSQL, + runner=runner, + settings=settings, + test_case=test_case, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/join.py b/ydb/library/yql/providers/generic/connector/tests/join/test_case.py similarity index 94% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/join.py rename to ydb/library/yql/providers/generic/connector/tests/join/test_case.py index 3d46473a652a..e10bad53372c 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/join.py +++ b/ydb/library/yql/providers/generic/connector/tests/join/test_case.py @@ -6,9 +6,9 @@ from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat from ydb.public.api.protos.ydb_value_pb2 import Type -from utils.settings import Settings -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql from ydb.library.yql.providers.generic.connector.tests.utils.database import Database from ydb.library.yql.providers.generic.connector.tests.utils.data_source_kind import data_source_kind_alias from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( @@ -138,7 +138,7 @@ class Factory: ), ] - def make_simple_test_cases(self) -> Sequence[TestCase]: + def __make_simple_test_cases(self) -> Sequence[TestCase]: tables: Sequence[TestCase] = [ Table( name='example_1', @@ -215,7 +215,7 @@ def make_simple_test_cases(self) -> Sequence[TestCase]: return test_cases - def make_inner_join_test_case(self) -> Sequence[TestCase]: + def __make_inner_join_test_case(self) -> Sequence[TestCase]: ch_table = Table( name='test_1', schema=Schema( @@ -269,4 +269,4 @@ def make_inner_join_test_case(self) -> Sequence[TestCase]: ] def make_test_cases(self) -> Sequence[TestCase]: - return self.make_simple_test_cases() + self.make_inner_join_test_case() + return self.__make_simple_test_cases() + self.__make_inner_join_test_case() diff --git a/ydb/library/yql/providers/generic/connector/tests/join/ya.make b/ydb/library/yql/providers/generic/connector/tests/join/ya.make new file mode 100644 index 000000000000..74b7e810afbe --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/ya.make @@ -0,0 +1,66 @@ +PY3TEST() + +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=join) + +IF (AUTOCHECK) + # Split tests to chunks only when they're running on different machines with distbuild, + # otherwise this directive will slow down local test execution. + # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. + FORK_SUBTESTS() + + # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose + TAG( + ya:external + ya:force_sandbox + ya:fat + ) + + REQUIREMENTS( + container:4467981730 + cpu:all + dns:dns64 + ) +ENDIF() + +INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) + +# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, +# which makes it impossible to run them during precommit checks on Github CI. +# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + +IF (OPENSOURCE) + SIZE(MEDIUM) + SET(TEST_TAGS_VALUE) + SET(TEST_REQUIREMENTS_VALUE) +ENDIF() + +TEST_SRCS( + collection.py + conftest.py + scenario.py + test.py + test_case.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/api/service/protos + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/test.py b/ydb/library/yql/providers/generic/connector/tests/test.py deleted file mode 100644 index 46d6cc718290..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test.py +++ /dev/null @@ -1,230 +0,0 @@ -from pathlib import Path -import pytest - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind - -from utils.settings import Settings -import clickhouse -import join -import postgresql -from test_cases.collection import Collection -import test_cases.join -import test_cases.select_missing_database -import test_cases.select_missing_table -import test_cases.select_positive_common -import utils.clickhouse -from utils.runner import Runner -from conftest import configure_runner -import utils.dqrun as dqrun -import utils.kqprun as kqprun -import utils.postgresql - - -# Global collection of test cases dependent on environment -tc_collection = Collection(Settings.from_env()) - -runners = (dqrun.DqRunner, kqprun.KqpRunner) -runners_ids = ("dqrun", "kqprun") - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_positive_postgresql'), ids=tc_collection.ids('select_positive_postgresql') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("postgresql_client") -def test_select_positive_postgresql( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - postgresql.select_positive( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_positive_clickhouse'), ids=tc_collection.ids('select_positive_clickhouse') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -def test_select_positive_clickhouse( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - clickhouse.select_positive( - test_name=request.node.name, settings=settings, runner=runner, client=clickhouse_client, test_case=test_case - ) - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_missing_database'), ids=tc_collection.ids('select_missing_database') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_select_missing_database( - request: pytest.FixtureRequest, - tmp_path: Path, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_missing_database.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - match test_case.data_source_kind: - case EDataSourceKind.CLICKHOUSE: - clickhouse.select_missing_table( - settings=settings, - runner=runner, - client=clickhouse_client, - test_case=test_case, - test_name=request.node.name, - ) - case EDataSourceKind.POSTGRESQL: - postgresql.select_missing_table( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) - case _: - raise Exception(f'invalid data source: {test_case.data_source_kind}') - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_missing_table'), ids=tc_collection.ids('select_missing_table') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_select_missing_table( - request: pytest.FixtureRequest, - tmp_path: Path, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_missing_table.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - match test_case.data_source_kind: - case EDataSourceKind.CLICKHOUSE: - clickhouse.select_missing_table( - test_name=request.node.name, - settings=settings, - runner=runner, - client=clickhouse_client, - test_case=test_case, - ) - case EDataSourceKind.POSTGRESQL: - postgresql.select_missing_table( - test_name=request.node.name, - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - ) - case _: - raise Exception(f'invalid data source: {test_case.data_source_kind}') - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize("test_case", tc_collection.get('join'), ids=tc_collection.ids('join')) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_join( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.join.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - join.join( - test_name=request.node.name, - clickhouse_client=clickhouse_client, - postgresql_client=postgresql_client, - runner=runner, - settings=settings, - test_case=test_case, - ) - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize("test_case", tc_collection.get('select_datetime'), ids=tc_collection.ids('select_datetime')) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_select_datetime( - request: pytest.FixtureRequest, - tmp_path: Path, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - match test_case.data_source_kind: - case EDataSourceKind.CLICKHOUSE: - clickhouse.select_positive( - test_name=request.node.name, - test_case=test_case, - settings=settings, - runner=runner, - client=clickhouse_client, - ) - case EDataSourceKind.POSTGRESQL: - postgresql.select_positive( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) - case _: - raise Exception(f'invalid data source: {test_case.data_source_kind}') - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", - tc_collection.get('select_positive_postgresql_schema'), - ids=tc_collection.ids('select_positive_postgresql_schema'), -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("postgresql_client") -def test_select_pg_schema( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - postgresql.select_pg_schema( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py b/ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py deleted file mode 100644 index 429d69c32892..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Sequence, Mapping - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind -import ydb.library.yql.providers.generic.connector.tests.test_cases.join as join -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_datetime as select_datetime -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_missing_database as select_missing_database -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_missing_table as select_missing_table -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_clickhouse as select_positive_clickhouse -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common as select_positive_common -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_postgresql as select_positive_postgresql -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_postgresql_schema as select_positive_postgresql_schema -from utils.settings import Settings - - -class Collection(object): - _test_cases: Mapping[str, Sequence] - - def __init__(self, ss: Settings): - self._test_cases = { - 'join': join.Factory().make_test_cases(), - 'select_missing_database': select_missing_database.Factory().make_test_cases(), - 'select_missing_table': select_missing_table.Factory().make_test_cases(), - 'select_positive_postgresql': select_positive_postgresql.Factory().make_test_cases() - + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.POSTGRESQL), - 'select_positive_postgresql_schema': select_positive_postgresql_schema.Factory().make_test_cases(), - 'select_positive_clickhouse': select_positive_clickhouse.Factory().make_test_cases() - + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.CLICKHOUSE), - 'select_datetime': select_datetime.Factory().make_test_cases(), - } - - def get(self, key: str) -> Sequence: - if key not in self._test_cases: - raise ValueError(f'no such test: {key}') - - return self._test_cases[key] - - def ids(self, key: str) -> Sequence[str]: - if key not in self._test_cases: - raise ValueError(f'no such test: {key}') - - return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py b/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py deleted file mode 100644 index e0eaab481876..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import List - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase - - -TestCase = BaseTestCase - - -class Factory: - def make_test_cases(self) -> List[TestCase]: - data_source_kinds = ( - EDataSourceKind.CLICKHOUSE, - EDataSourceKind.POSTGRESQL, - ) - - test_cases = [] - for data_source_kind in data_source_kinds: - test_case_name = 'missing_database' - - test_case = TestCase( - name_=test_case_name, - data_source_kind=data_source_kind, - protocol=EProtocol.NATIVE, - pragmas=dict(), - ) - - test_cases.append(test_case) - - return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py b/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py deleted file mode 100644 index 42c2230755b3..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import List - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase - - -TestCase = BaseTestCase - - -class Factory: - def make_test_cases(self) -> List[TestCase]: - data_source_kinds = ( - EDataSourceKind.CLICKHOUSE, - EDataSourceKind.POSTGRESQL, - ) - - test_cases = [] - for data_source_kind in data_source_kinds: - test_case_name = 'missing_table' - - test_case = TestCase( - name_=test_case_name, - data_source_kind=data_source_kind, - protocol=EProtocol.NATIVE, - pragmas=dict(), - ) - - test_cases.append(test_case) - - return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py new file mode 100644 index 000000000000..c23aa6f2cf9b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py @@ -0,0 +1,32 @@ +from typing import TypeAlias +from datetime import datetime +import sys +import time + +import clickhouse_connect +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +Client: TypeAlias = clickhouse_connect.driver.client.Client + + +def make_client(settings: Settings.ClickHouse) -> Client: + start = datetime.now() + attempt = 0 + + while (datetime.now() - start).total_seconds() < 60: + attempt += 1 + try: + client = clickhouse_connect.get_client( + host=settings.host_external, + port=settings.http_port_external, + username=settings.username, + password=settings.password, + ) + except Exception as e: + sys.stderr.write(f"attempt #{attempt}: {e}\n") + time.sleep(5) + continue + + return client + + raise Exception(f"Failed to connect ClickHouse in {attempt} attempt(s)") diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py new file mode 100644 index 000000000000..544cd3815e6b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py @@ -0,0 +1,60 @@ +from contextlib import contextmanager +import time +from datetime import datetime +from typing import Tuple + +import pg8000.dbapi + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger + +LOGGER = make_logger(__name__) + + +class Client: + # database name -> pool + settings: Settings.PostgreSQL + + def __init__(self, settings: Settings.PostgreSQL): + self.settings = settings + self.pools = dict() + LOGGER.debug("initializing client") + + @contextmanager + def get_cursor(self, dbname: str): + conn, cursor = self._make_cursor(dbname=dbname) + yield conn, cursor + cursor.close() + conn.close() + + def _make_cursor(self, dbname: str) -> Tuple[pg8000.dbapi.Connection, pg8000.dbapi.Cursor]: + LOGGER.debug(f"making cursor for database {dbname}") + start = datetime.now() + attempt = 0 + + while (datetime.now() - start).total_seconds() < 10: + attempt += 1 + try: + LOGGER.debug( + f"trying to connect PostgreSQL: {self.settings.host_external}:{self.settings.port_external}" + ) + conn = pg8000.dbapi.Connection( + user=self.settings.username, + password=self.settings.password, + host=self.settings.host_external, + port=self.settings.port_external, + database=dbname, + timeout=10, + ) + conn.autocommit = True + + cur = conn.cursor() + return conn, cur + except Exception as e: + LOGGER.error(f"connection attempt #{attempt} failed: {e} {e.args}") + time.sleep(1) + continue + + ss = self.settings + params = f'{ss.username} {ss.password} {ss.host_external} {ss.port_external} {dbname}' + raise Exception(f"Failed to connect PostgreSQL in {attempt} attempt(s) with params: {params}") diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make new file mode 100644 index 000000000000..c0845060e513 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make @@ -0,0 +1,16 @@ +PY3_LIBRARY() + +PY_SRCS( + clickhouse.py + postgresql.py + ydb.py +) + +PEERDIR( + contrib/python/clickhouse-connect + contrib/python/pg8000 + ydb/public/sdk/python + ydb/library/yql/providers/generic/connector/tests/utils +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py new file mode 100644 index 000000000000..f4e2345789ed --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py @@ -0,0 +1,10 @@ +import ydb +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +def make_client(s: Settings.Ydb) -> ydb.Driver: + endpoint = f"grpc://{s.host_external}:{s.port_external}" + + driver = ydb.Driver(endpoint=endpoint, database=s.dbname, credentials=ydb.AnonymousCredentials()) + driver.wait(timeout=5) + return driver diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/database.py b/ydb/library/yql/providers/generic/connector/tests/utils/database.py index c691aed42757..5b9c9cbe9305 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/database.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/database.py @@ -17,6 +17,10 @@ def __init__(self, name: str, kind: EDataSourceKind.ValueType): self.name = name[:63].lower() case EDataSourceKind.CLICKHOUSE: self.name = name[:255] + case EDataSourceKind.YDB: + # We use a different way of initialization when working with YDB. + # There is only one preinstalled database called + self.name = "local" case _: raise Exception(f'invalid data source: {self.kind}') diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py b/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py index 7d5b2b13d855..98ca5788cf79 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py @@ -1,22 +1,98 @@ import os import subprocess +import shutil +import yaml +import socket +from typing import Dict, Any import yatest.common +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger + +LOGGER = make_logger(__name__) + class EndpointDeterminer: - docker_compose_bin: os.PathLike - docker_compose_yml: os.PathLike + docker_bin_path: os.PathLike + docker_compose_bin_path: os.PathLike + + docker_compose_yml_path: os.PathLike + docker_compose_yml_data: Dict[str, Any] - def __init__(self, docker_compose_yml: os.PathLike): - self.docker_compose_bin = yatest.common.build_path('library/recipes/docker_compose/bin/docker-compose') - self.docker_compose_yml = docker_compose_yml + def __init__(self, docker_compose_yml_path: os.PathLike): + self.docker_bin_path = shutil.which('docker') + self.docker_compose_bin_path = yatest.common.build_path('library/recipes/docker_compose/bin/docker-compose') + self.docker_compose_yml_path = docker_compose_yml_path - def get_port(self, service_name: str, internal_port: int) -> int: - cmd = [self.docker_compose_bin, '-f', self.docker_compose_yml, 'port', service_name, str(internal_port)] + with open(self.docker_compose_yml_path) as f: + self.docker_compose_yml_data = yaml.load(f) + + def get_external_port(self, service_name: str, internal_port: int) -> int: + cmd = [ + self.docker_compose_bin_path, + '-f', + self.docker_compose_yml_path, + 'port', + service_name, + str(internal_port), + ] try: out = subprocess.check_output(cmd, stderr=subprocess.STDOUT) external_port = int(out.split(b':')[1]) return external_port except subprocess.CalledProcessError as e: raise RuntimeError(f"docker-compose error: {e.output} (code {e.returncode})") + + @staticmethod + def __is_valid_ipv4_address(address: str) -> bool: + try: + socket.inet_pton(socket.AF_INET, address) + except AttributeError as e1: # no inet_pton here, sorry + LOGGER.warn(f"validate '{address}' with inet_pton error: {e1}") + try: + socket.inet_aton(address) + except socket.error as e2: + LOGGER.error(f"validate '{address}' with inet_aton error: {e2}") + return False + return address.count('.') == 3 + except socket.error as e3: # not a valid address + LOGGER.error(f"validate '{address}' with inet_pton error: {e3}") + return False + + return True + + @staticmethod + def __is_valid_ipv6_address(address: str) -> bool: + try: + socket.inet_pton(socket.AF_INET6, address) + except socket.error: # not a valid address + return False + return True + + @staticmethod + def __is_valid_ip_address(address: str) -> bool: + return EndpointDeterminer.__is_valid_ipv4_address(address) or EndpointDeterminer.__is_valid_ipv6_address( + address + ) + + def get_internal_ip(self, service_name: str) -> str: + container_name = self.docker_compose_yml_data['services'][service_name]['container_name'] + cmd = [ + self.docker_bin_path, + "inspect", + "-f", + "'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'", + container_name, + ] + try: + out = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode('utf8').strip().strip("'") + + if not EndpointDeterminer.__is_valid_ip_address(out): + raise ValueError(f"IP determined for container '{container_name}' is invalid: '{out}'") + + return out + except subprocess.CalledProcessError as e: + raise RuntimeError(f"docker-compose error: {e.output} (code {e.returncode})") + + def get_container_name(self, service_name: str) -> str: + return self.docker_compose_yml_data['services'][service_name]['container_name'] diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/generate.py b/ydb/library/yql/providers/generic/connector/tests/utils/generate.py index 93efd5e58273..879c7f43140e 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/generate.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/generate.py @@ -2,7 +2,7 @@ from typing import Sequence -from utils.schema import Schema +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema from ydb.public.api.protos.ydb_value_pb2 import Type diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py deleted file mode 100644 index b8d77e6c1775..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py +++ /dev/null @@ -1,176 +0,0 @@ -from contextlib import contextmanager -import abc -import time -from datetime import datetime -from typing import Tuple -import sys - -import pg8000.dbapi - -from utils.settings import Settings - - -class Client: - # database name -> pool - settings: Settings.PostgreSQL - - def __init__(self, settings: Settings.PostgreSQL): - self.settings = settings - self.pools = dict() - - @contextmanager - def get_cursor(self, dbname: str): - conn, cursor = self._make_cursor(dbname=dbname) - yield conn, cursor - cursor.close() - conn.close() - - def _make_cursor(self, dbname: str) -> Tuple[pg8000.dbapi.Connection, pg8000.dbapi.Cursor]: - start = datetime.now() - attempt = 0 - - while (datetime.now() - start).total_seconds() < 10: - attempt += 1 - try: - sys.stdout.write( - f"Trying to connect PostgreSQL: {self.settings.host_external}:{self.settings.port_external}\n" - ) - conn = pg8000.dbapi.Connection( - user=self.settings.username, - password=self.settings.password, - host=self.settings.host_external, - port=self.settings.port_external, - database=dbname, - timeout=10, - ) - conn.autocommit = True - - cur = conn.cursor() - return conn, cur - except Exception as e: - sys.stderr.write(f"attempt #{attempt} failed: {e} {e.args}\n") - time.sleep(3) - continue - - ss = self.settings - params = f'{ss.username} {ss.password} {ss.host_external} {ss.port_external} {dbname}' - raise Exception(f"Failed to connect PostgreSQL in {attempt} attempt(s) with params: {params}") - - -class Type(abc.ABC): - @abc.abstractmethod - def to_sql(self) -> str: - pass - - -class PrimitiveType(Type): - def to_sql(self): - return type(self).__name__.lower() - - -class Boolean(PrimitiveType): - pass - - -class Bool(PrimitiveType): - pass - - -class SmallInt(PrimitiveType): - pass - - -class Int2(PrimitiveType): - pass - - -class SmallSerial(PrimitiveType): - pass - - -class Serial2(PrimitiveType): - pass - - -class Integer(PrimitiveType): - pass - - -class Int(PrimitiveType): - pass - - -class Int4(PrimitiveType): - pass - - -class Serial(PrimitiveType): - pass - - -class Serial4(PrimitiveType): - pass - - -class BigInt(PrimitiveType): - pass - - -class Int8(PrimitiveType): - pass - - -class BigSerial(PrimitiveType): - pass - - -class Serial8(PrimitiveType): - pass - - -class Real(PrimitiveType): - pass - - -class Float4(PrimitiveType): - pass - - -class DoublePrecision(PrimitiveType): - def to_sql(self): - return 'double precision' - - -class Float8(PrimitiveType): - pass - - -class Bytea(PrimitiveType): - pass - - -class Character(PrimitiveType): - def to_sql(self): - return 'character (5)' - - -class CharacterVarying(PrimitiveType): - def to_sql(self): - return 'character varying (5)' - - -class Text(PrimitiveType): - pass - - -class TimestampWithoutTimeZone(PrimitiveType): - def to_sql(self): - return 'timestamp without time zone' - - -class Date(PrimitiveType): - pass - - -class Time(PrimitiveType): - pass diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/dqrun.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/dqrun.py similarity index 91% rename from ydb/library/yql/providers/generic/connector/tests/utils/dqrun.py rename to ydb/library/yql/providers/generic/connector/tests/utils/run/dqrun.py index f3ee81780d84..5ffc0140e9c2 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/dqrun.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/dqrun.py @@ -10,16 +10,18 @@ from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts -from ydb.library.yql.providers.generic.connector.tests.utils.runner import Result, Runner from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings, GenericSettings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.run.result import Result + LOGGER = make_logger(__name__) class GatewaysConfRenderer: - template_: Final = ''' + _template: Final = ''' Generic { Connector { Endpoint { @@ -99,6 +101,26 @@ class GatewaysConfRenderer: }} {% endfor %} +{% for cluster in generic_settings.ydb_clusters %} + ClusterMapping { + Kind: YDB + Name: "{{settings.ydb.cluster_name}}" + DatabaseName: "{{cluster.database}}" + Credentials { + basic { + username: "{{settings.ydb.username}}" + password: "{{settings.ydb.password}}" + } + } + Endpoint { + host: "{{settings.ydb.host_internal}}" + port: {{settings.ydb.port_internal}} + } + UseSsl: false + Protocol: NATIVE + } +{% endfor %} + DefaultSettings { Name: "DateTimeFormat" {% if generic_settings.date_time_format == EDateTimeFormat.STRING_FORMAT %} @@ -107,7 +129,6 @@ class GatewaysConfRenderer: Value: "YQL" {% endif %} } - } Dq { @@ -165,7 +186,7 @@ class GatewaysConfRenderer: def __init__(self): self.template = jinja2.Environment(loader=jinja2.BaseLoader, undefined=jinja2.DebugUndefined).from_string( - self.template_ + self._template ) self.template.globals['EProtocol'] = EProtocol self.template.globals['EDateTimeFormat'] = EDateTimeFormat diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/kqprun.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/kqprun.py similarity index 93% rename from ydb/library/yql/providers/generic/connector/tests/utils/kqprun.py rename to ydb/library/yql/providers/generic/connector/tests/utils/run/kqprun.py index 603f8fd96c33..9433d4db2296 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/kqprun.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/kqprun.py @@ -1,20 +1,21 @@ from pathlib import Path -import subprocess from typing import Final +import json +import subprocess import jinja2 -import json - from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EProtocol from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts -from ydb.library.yql.providers.generic.connector.tests.utils.runner import Result, Runner from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings, GenericSettings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.run.result import Result + LOGGER = make_logger(__name__) @@ -32,8 +33,10 @@ class SchemeRenderer: AUTH_METHOD="BASIC", LOGIN="{{login}}", PASSWORD_SECRET_NAME="{{data_source}}_local_password", - USE_TLS="FALSE", - PROTOCOL="{{protocol}}" + {% if protocol %} + PROTOCOL="{{protocol}}", + {% endif %} + USE_TLS="FALSE" {% if kind == POSTGRESQL and schema %} ,SCHEMA="{{schema}}" @@ -44,6 +47,7 @@ class SchemeRenderer: {% set CLICKHOUSE = 'ClickHouse' %} {% set POSTGRESQL = 'PostgreSQL' %} +{% set YDB = 'Ydb' %} {% set NATIVE = 'NATIVE' %} {% set HTTP = 'HTTP' %} @@ -85,6 +89,20 @@ class SchemeRenderer: }} {% endfor %} +{% for cluster in generic_settings.ydb_clusters %} +{{ create_data_source( + YDB, + settings.ydb.cluster_name, + settings.ydb.host_internal, + settings.ydb.port_internal, + settings.ydb.username, + settings.ydb.password, + NONE, + cluster.database, + NONE) +}} +{% endfor %} + ''' def __init__(self): diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/runner.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/parent.py similarity index 69% rename from ydb/library/yql/providers/generic/connector/tests/utils/runner.py rename to ydb/library/yql/providers/generic/connector/tests/utils/run/parent.py index d5d4b12e1000..d297efd01dbe 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/runner.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/parent.py @@ -1,20 +1,8 @@ from abc import ABC, abstractmethod -from dataclasses import dataclass from pathlib import Path -from typing import List, Optional from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings, GenericSettings -from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema, YsonList - - -@dataclass -class Result: - data_out: Optional[YsonList] - data_out_with_types: Optional[List] - schema: Optional[Schema] - stdout: str - stderr: str - returncode: int +from ydb.library.yql.providers.generic.connector.tests.utils.run.result import Result class Runner(ABC): diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/run/result.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/result.py new file mode 100644 index 000000000000..45078d16bd00 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/result.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass +from typing import Optional, List + +from yt import yson +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema + + +@dataclass +class Result: + data_out: Optional[yson.yson_types.YsonList] + data_out_with_types: Optional[List] + schema: Optional[Schema] + stdout: str + stderr: str + returncode: int diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py new file mode 100644 index 000000000000..435502c7d55e --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py @@ -0,0 +1,23 @@ +from typing import Final + +import yatest.common as yat + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.run.dqrun import DqRunner +from ydb.library.yql.providers.generic.connector.tests.utils.run.kqprun import KqpRunner + +# used in every test.py +runner_types: Final = ("dqrun", "kqprun") + + +# used in every test.py +def configure_runner(runner_type: str, settings: Settings) -> Runner: + match runner_type: + case "dqrun": + return DqRunner(dqrun_path=yat.build_path("ydb/library/yql/tools/dqrun/dqrun"), settings=settings) + case "kqprun": + return KqpRunner(kqprun_path=yat.build_path("ydb/tests/tools/kqprun/kqprun"), settings=settings) + case _: + raise ValueError(runner_type) diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make new file mode 100644 index 000000000000..8173db5b86d4 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make @@ -0,0 +1,23 @@ +PY3_LIBRARY() + +STYLE_PYTHON() + +PY_SRCS( + dqrun.py + kqprun.py + parent.py + result.py + runners.py +) + +PEERDIR( + contrib/python/Jinja2 + contrib/python/PyYAML + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/api/service/protos + ydb/library/yql/providers/generic/connector/tests/utils + ydb/public/api/protos + yt/python/yt/yson +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/clickhouse.py similarity index 75% rename from ydb/library/yql/providers/generic/connector/tests/clickhouse.py rename to ydb/library/yql/providers/generic/connector/tests/utils/scenario/clickhouse.py index ea5588dda62c..2429556ffafc 100644 --- a/ydb/library/yql/providers/generic/connector/tests/clickhouse.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/clickhouse.py @@ -2,19 +2,19 @@ import ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 as data_source_pb2 -import utils.artifacts as artifacts -from utils.clickhouse import Client -from utils.comparator import data_outs_equal -from utils.database import Database -from utils.log import make_logger, debug_with_limit -from utils.schema import Schema -from utils.settings import Settings -from utils.runner import Runner -from utils.sql import format_values_for_bulk_sql_insert - -import test_cases.select_missing_database -import test_cases.select_missing_table -import test_cases.select_positive_common +import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.database import Database +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger, debug_with_limit +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.sql import format_values_for_bulk_sql_insert +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as tc_select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as tc_select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as tc_select_positive_common LOGGER = make_logger(__name__) @@ -60,7 +60,7 @@ def prepare_table( def select_positive( test_name: str, - test_case: test_cases.select_missing_table.TestCase, + test_case: tc_select_positive_common.TestCase, settings: Settings, runner: Runner, client: Client, @@ -113,7 +113,7 @@ def select_positive( def select_missing_database( test_name: str, - test_case: test_cases.select_missing_database.TestCase, + test_case: tc_select_missing_database.TestCase, settings: Settings, runner: Runner, ): @@ -133,7 +133,7 @@ def select_missing_database( def select_missing_table( test_name: str, - test_case: test_cases.select_missing_table.TestCase, + test_case: tc_select_missing_table.TestCase, settings: Settings, runner: Runner, client: Client, diff --git a/ydb/library/yql/providers/generic/connector/tests/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/postgresql.py similarity index 81% rename from ydb/library/yql/providers/generic/connector/tests/postgresql.py rename to ydb/library/yql/providers/generic/connector/tests/utils/scenario/postgresql.py index b8c381466195..babcc37abd12 100644 --- a/ydb/library/yql/providers/generic/connector/tests/postgresql.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/postgresql.py @@ -2,22 +2,19 @@ import ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 as data_source_pb2 -import utils.artifacts as artifacts -from utils.comparator import data_outs_equal -from utils.database import Database -from utils.log import make_logger, debug_with_limit -from utils.postgresql import Client -from utils.schema import Schema -from utils.settings import Settings -from utils.runner import Runner -from utils.sql import format_values_for_bulk_sql_insert - - -import test_cases.select_missing_database -import test_cases.select_missing_table -import test_cases.select_positive_common -import test_cases.select_positive_postgresql_schema - +import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.database import Database +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger, debug_with_limit +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.sql import format_values_for_bulk_sql_insert +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as tc_select_positive_common +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as tc_select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as tc_select_missing_table LOGGER = make_logger(__name__) @@ -88,7 +85,7 @@ def prepare_table( def select_positive( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_positive_common.TestCase, settings: Settings, runner: Runner, client: Client, @@ -133,7 +130,7 @@ def select_positive( def select_missing_database( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_missing_database.TestCase, settings: Settings, runner: Runner, ): @@ -154,7 +151,7 @@ def select_missing_database( def select_missing_table( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_missing_table.TestCase, settings: Settings, runner: Runner, client: Client, @@ -190,7 +187,7 @@ def select_missing_table( def select_pg_schema( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_positive_common.TestCase, settings: Settings, runner: Runner, client: Client, diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make new file mode 100644 index 000000000000..d4067949140e --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make @@ -0,0 +1,17 @@ +PY3_LIBRARY() + +PY_SRCS( + clickhouse.py + postgresql.py + ydb.py +) + +PEERDIR( + contrib/python/clickhouse-connect + contrib/python/pg8000 + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/run +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py new file mode 100644 index 000000000000..67f96b15b87c --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py @@ -0,0 +1,38 @@ +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as tc_select_positive_common + + +def select_positive( + test_name: str, + test_case: tc_select_positive_common.TestCase, + settings: Settings, + runner: Runner, +): + # read data + where_statement = "" + if test_case.select_where is not None: + where_statement = "WHERE " + test_case.select_where.render( + cluster_name=settings.ydb.cluster_name, + table_name=test_case.qualified_table_name, + ) + yql_script = f""" + {test_case.pragmas_sql_string} + SELECT {test_case.select_what.yql_select_names} + FROM {settings.ydb.cluster_name}.{test_case.qualified_table_name} + {where_statement} + """ + result = runner.run( + test_name=test_name, + script=yql_script, + generic_settings=test_case.generic_settings, + ) + + assert result.returncode == 0, result.stderr + + assert data_outs_equal(test_case.data_out, result.data_out_with_types), ( + test_case.data_out, + result.data_out_with_types, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/schema.py b/ydb/library/yql/providers/generic/connector/tests/utils/schema.py index fb5771c35991..e0ba178cae18 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/schema.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/schema.py @@ -8,8 +8,9 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind from ydb.public.api.protos.ydb_value_pb2 import Type, OptionalType -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.ydb as Ydb YsonList: TypeAlias = yson.yson_types.YsonList @@ -18,6 +19,7 @@ class DataSourceType: ch: clickhouse.Type = None pg: postgresql.Type = None + ydb: Ydb.Type = None def pick(self, kind: EDataSourceKind.ValueType) -> str: target = None @@ -26,6 +28,8 @@ def pick(self, kind: EDataSourceKind.ValueType) -> str: target = self.ch case EDataSourceKind.POSTGRESQL: target = self.pg + case EDataSourceKind.YDB: + target = self.ydb case _: raise Exception(f'invalid data source: {kind}') diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/settings.py b/ydb/library/yql/providers/generic/connector/tests/utils/settings.py index 97bfde8d3394..51971998665d 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/settings.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/settings.py @@ -1,5 +1,6 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional, Sequence +import pathlib import yatest.common @@ -39,7 +40,7 @@ class PostgreSQL: dbname: str cluster_name: str username: str - password: Optional[str] + password: Optional[str] # TODO: why optional? host_external: str host_internal: str port_external: int @@ -47,42 +48,79 @@ class PostgreSQL: postgresql: PostgreSQL + @dataclass + class Ydb: + dbname: str + cluster_name: str + username: str + password: str + host_internal: str + port_internal: int + + ydb: Ydb + @classmethod - def from_env(cls) -> 'Settings': - docker_compose_file = yatest.common.source_path( - 'ydb/library/yql/providers/generic/connector/tests/docker-compose.yml' - ) - endpoint_determiner = EndpointDeterminer(docker_compose_file) + def from_env(cls, docker_compose_dir: pathlib.Path, data_source_kinds: Sequence[EDataSourceKind]) -> 'Settings': + docker_compose_file_relative_path = str(docker_compose_dir / 'docker-compose.yml') + docker_compose_file_abs_path = yatest.common.source_path(docker_compose_file_relative_path) + endpoint_determiner = EndpointDeterminer(docker_compose_file_abs_path) + + data_sources = dict() + + for data_source_kind in data_source_kinds: + match data_source_kind: + case EDataSourceKind.CLICKHOUSE: + data_sources[data_source_kind] = cls.ClickHouse( + cluster_name='clickhouse_integration_test', + host_external='0.0.0.0', + # This hack is due to https://st.yandex-team.ru/YQ-3003. + # Previously we used container names instead of container ips: + # host_internal=docker_compose_file['services']['clickhouse']['container_name'], + host_internal=endpoint_determiner.get_internal_ip('clickhouse'), + http_port_external=endpoint_determiner.get_external_port('clickhouse', 8123), + native_port_external=endpoint_determiner.get_external_port('clickhouse', 9000), + http_port_internal=8123, + native_port_internal=9000, + username='user', + password='password', + protocol='native', + ) + case EDataSourceKind.POSTGRESQL: + data_sources[data_source_kind] = cls.PostgreSQL( + cluster_name='postgresql_integration_test', + host_external='0.0.0.0', + # This hack is due to https://st.yandex-team.ru/YQ-3003. + # Previously we used container names instead of container ips: + # host_internal=docker_compose_file['services']['postgresql']['container_name'], + host_internal=endpoint_determiner.get_internal_ip('postgresql'), + port_external=endpoint_determiner.get_external_port('postgresql', 5432), + port_internal=5432, + dbname='db', + username='user', + password='password', + ) + case EDataSourceKind.YDB: + data_sources[data_source_kind] = cls.Ydb( + cluster_name='ydb_integration_test', + host_internal=endpoint_determiner.get_container_name('ydb'), + port_internal=2136, + dbname="local", + username='user', + password='password', + ) + case _: + raise Exception(f'invalid data source: {data_source_kind}') return cls( connector=cls.Connector( grpc_host='localhost', - grpc_port=endpoint_determiner.get_port('fq-connector-go', 50051), + grpc_port=endpoint_determiner.get_external_port('fq-connector-go', 2130), paging_bytes_per_page=4 * 1024 * 1024, paging_prefetch_queue_capacity=2, ), - clickhouse=cls.ClickHouse( - cluster_name='clickhouse_integration_test', - host_external='localhost', - host_internal='clickhouse', - http_port_external=endpoint_determiner.get_port('clickhouse', 8123), - native_port_external=endpoint_determiner.get_port('clickhouse', 9000), - http_port_internal=8123, - native_port_internal=9000, - username='user', - password='password', - protocol='native', - ), - postgresql=cls.PostgreSQL( - cluster_name='postgresql_integration_test', - host_external='localhost', - host_internal='postgresql', - port_external=endpoint_determiner.get_port('postgresql', 5432), - port_internal=5432, - dbname='db', - username='user', - password='password', - ), + clickhouse=data_sources.get(EDataSourceKind.CLICKHOUSE), + postgresql=data_sources.get(EDataSourceKind.POSTGRESQL), + ydb=data_sources.get(EDataSourceKind.YDB), ) def get_cluster_name(self, data_source_kind: EDataSourceKind) -> str: @@ -97,6 +135,8 @@ def get_cluster_name(self, data_source_kind: EDataSourceKind) -> str: @dataclass class GenericSettings: + date_time_format: EDateTimeFormat + @dataclass class ClickHouseCluster: def __hash__(self) -> int: @@ -105,7 +145,7 @@ def __hash__(self) -> int: database: str protocol: EProtocol - clickhouse_clusters: Sequence[ClickHouseCluster] + clickhouse_clusters: Sequence[ClickHouseCluster] = field(default_factory=list) @dataclass class PostgreSQLCluster: @@ -115,6 +155,10 @@ def __hash__(self) -> int: database: str schema: str - postgresql_clusters: Sequence[PostgreSQLCluster] + postgresql_clusters: Sequence[PostgreSQLCluster] = field(default_factory=list) - date_time_format: EDateTimeFormat + @dataclass + class YdbCluster: + database: str + + ydb_clusters: Sequence[YdbCluster] = field(default_factory=list) diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/utils/types/clickhouse.py similarity index 61% rename from ydb/library/yql/providers/generic/connector/tests/utils/clickhouse.py rename to ydb/library/yql/providers/generic/connector/tests/utils/types/clickhouse.py index 991b808c1cb8..342bb2c35137 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/clickhouse.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/clickhouse.py @@ -1,33 +1,4 @@ -from typing import TypeAlias import abc -from datetime import datetime -import sys -import time - -import clickhouse_connect -from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings - -Client: TypeAlias = clickhouse_connect.driver.client.Client - - -def make_client(s: Settings.ClickHouse) -> Client: - start = datetime.now() - attempt = 0 - - while (datetime.now() - start).total_seconds() < 60: - attempt += 1 - try: - client = clickhouse_connect.get_client( - host=s.host_external, port=s.http_port_external, username=s.username, password=s.password - ) - except Exception as e: - sys.stderr.write(f"attempt #{attempt}: {e}\n") - time.sleep(5) - continue - - return client - - raise Exception(f"Failed to connect ClickHouse in {attempt} attempt(s)") class Type(abc.ABC): diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py new file mode 100644 index 000000000000..d45b169cf5ab --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py @@ -0,0 +1,120 @@ +import abc + + +class Type(abc.ABC): + @abc.abstractmethod + def to_sql(self) -> str: + pass + + +class PrimitiveType(Type): + def to_sql(self): + return type(self).__name__.lower() + + +class Boolean(PrimitiveType): + pass + + +class Bool(PrimitiveType): + pass + + +class SmallInt(PrimitiveType): + pass + + +class Int2(PrimitiveType): + pass + + +class SmallSerial(PrimitiveType): + pass + + +class Serial2(PrimitiveType): + pass + + +class Integer(PrimitiveType): + pass + + +class Int(PrimitiveType): + pass + + +class Int4(PrimitiveType): + pass + + +class Serial(PrimitiveType): + pass + + +class Serial4(PrimitiveType): + pass + + +class BigInt(PrimitiveType): + pass + + +class Int8(PrimitiveType): + pass + + +class BigSerial(PrimitiveType): + pass + + +class Serial8(PrimitiveType): + pass + + +class Real(PrimitiveType): + pass + + +class Float4(PrimitiveType): + pass + + +class DoublePrecision(PrimitiveType): + def to_sql(self): + return 'double precision' + + +class Float8(PrimitiveType): + pass + + +class Bytea(PrimitiveType): + pass + + +class Character(PrimitiveType): + def to_sql(self): + return 'character (5)' + + +class CharacterVarying(PrimitiveType): + def to_sql(self): + return 'character varying (5)' + + +class Text(PrimitiveType): + pass + + +class TimestampWithoutTimeZone(PrimitiveType): + def to_sql(self): + return 'timestamp without time zone' + + +class Date(PrimitiveType): + pass + + +class Time(PrimitiveType): + pass diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make new file mode 100644 index 000000000000..a5d5840bf551 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make @@ -0,0 +1,9 @@ +PY3_LIBRARY() + +PY_SRCS( + clickhouse.py + postgresql.py + ydb.py +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py b/ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py new file mode 100644 index 000000000000..9619a224abdb --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py @@ -0,0 +1,79 @@ +import abc + + +class Type(abc.ABC): + @abc.abstractmethod + def to_sql(self) -> str: + pass + + +class PrimitiveType(Type): + def to_sql(self): + return type(self).__name__ + + @classmethod + def to_nullable(cls): + return Nullable(cls()) + + +class Bool(PrimitiveType): + pass + + +class Int8(PrimitiveType): + pass + + +class Int16(PrimitiveType): + pass + + +class Int32(PrimitiveType): + pass + + +class Int64(PrimitiveType): + pass + + +class UInt8(PrimitiveType): + pass + + +class UInt16(PrimitiveType): + pass + + +class UInt32(PrimitiveType): + pass + + +class UInt64(PrimitiveType): + pass + + +class Float(PrimitiveType): + pass + + +class Double(PrimitiveType): + pass + + +class String(PrimitiveType): + pass + + +class FixedString(PrimitiveType): + def to_sql(self) -> str: + return "FixedString(5)" + + +class Nullable(Type): + primitive: PrimitiveType + + def __init__(self, primitive: PrimitiveType): + self.primitive = primitive + + def to_sql(self) -> str: + return f'Nullable({self.primitive.to_sql()})' diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/ya.make index 6d49965f3509..4d8f719e3f15 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/utils/ya.make @@ -1,29 +1,31 @@ PY3_LIBRARY() -STYLE_PYTHON() - PY_SRCS( artifacts.py - clickhouse.py comparator.py data_source_kind.py database.py - dqrun.py docker_compose.py generate.py - kqprun.py log.py - postgresql.py - runner.py schema.py settings.py sql.py ) PEERDIR( + contrib/python/PyYAML ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/tests/utils/types ydb/public/api/protos yt/python/yt/yson ) END() + +RECURSE_FOR_TESTS( + clients + run + scenario + types +) diff --git a/ydb/library/yql/providers/generic/connector/tests/ya.make b/ydb/library/yql/providers/generic/connector/tests/ya.make index 0100555d73cd..a4d804cfdcfe 100644 --- a/ydb/library/yql/providers/generic/connector/tests/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/ya.make @@ -1,74 +1,6 @@ -PY3TEST() - -STYLE_PYTHON() -NO_CHECK_IMPORTS() - -DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/docker-compose.yml) - -IF (AUTOCHECK) - # Split tests to chunks only when they're running on different machines with distbuild, - # otherwise this directive will slow down local test execution. - # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. - FORK_SUBTESTS() - - # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose - TAG( - ya:external - ya:force_sandbox - ya:fat - ) - - REQUIREMENTS( - container:4467981730 - cpu:all - dns:dns64 - ) -ENDIF() - -INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) - -# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, -# which makes it impossible to run them during precommit checks on Github CI. -# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. - -IF (OPENSOURCE) - SIZE(MEDIUM) - SET(TEST_TAGS_VALUE) - SET(TEST_REQUIREMENTS_VALUE) -ENDIF() - -TEST_SRCS( - conftest.py - clickhouse.py - postgresql.py - test.py -) - -PEERDIR( - contrib/python/Jinja2 - contrib/python/clickhouse-connect - contrib/python/grpcio - contrib/python/pg8000 - contrib/python/pytest - contrib/python/tzlocal - ydb/library/yql/providers/generic/connector/api/common - ydb/library/yql/providers/generic/connector/api/service - ydb/library/yql/providers/generic/connector/api/service/protos - ydb/library/yql/providers/generic/connector/tests/test_cases - ydb/library/yql/providers/generic/connector/tests/utils - ydb/public/api/protos - yt/python/yt/yson -) - -DEPENDS( - ydb/library/yql/tools/dqrun - ydb/tests/tools/kqprun - library/recipes/docker_compose/bin -) - -END() - RECURSE_FOR_TESTS( - test_cases + common_test_cases + datasource + join utils ) diff --git a/ydb/library/yql/providers/generic/proto/source.proto b/ydb/library/yql/providers/generic/proto/source.proto index 0911dd54ef26..725b0815698f 100644 --- a/ydb/library/yql/providers/generic/proto/source.proto +++ b/ydb/library/yql/providers/generic/proto/source.proto @@ -5,15 +5,19 @@ option cc_enable_arenas = true; package NYql.Generic; import "ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto"; -import "ydb/library/yql/providers/generic/connector/api/common/data_source.proto"; message TSource { - // Token to access database - // FIXME: unused field, delete it: - string token = 1; // Prepared Select expression NYql.NConnector.NApi.TSelect select = 2; - // Description of instance to connect - // FIXME: DataSourceInstance is already incapsulated into select, delete it: - NYql.NConnector.NApi.TDataSourceInstance data_source_instance = 3; -} \ No newline at end of file + + // Credentials used to access managed databases APIs. + // When working with external data source instances deployed in clouds, + // one should either set (ServiceAccountId, ServiceAccountIdSignature) pair + // that will be resolved into IAM Token via Token Accessor, + // or provide IAM Token directly. + string ServiceAccountId = 4; + string ServiceAccountIdSignature = 5; + string Token = 6; + + reserved 1, 3; +} diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp index a32ce9dc1215..48bb17d52670 100644 --- a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp +++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp @@ -180,7 +180,7 @@ class TBuildDqSourceSettingsTransformer: public TOptimizeTransformerBase { .Ptr(); ::google::protobuf::Any settings; TString sourceType; - dqIntegration->FillSourceSettings(*dqSourceNode, settings, sourceType); + dqIntegration->FillSourceSettings(*dqSourceNode, settings, sourceType, 1); UNIT_ASSERT_STRINGS_EQUAL(sourceType, "PostgreSqlGeneric"); UNIT_ASSERT(settings.Is()); settings.UnpackTo(DqSourceSettings_); @@ -243,6 +243,7 @@ struct TPushdownFixture: public NUnitTest::TBaseFixture { TypesCtx.Get(), FunctionRegistry.Get(), DatabaseResolver, + nullptr, GenericClient, GatewaysCfg.GetGeneric()); diff --git a/ydb/library/yql/providers/generic/provider/ya.make b/ydb/library/yql/providers/generic/provider/ya.make index ca9d84e6365b..55dd70b153e2 100644 --- a/ydb/library/yql/providers/generic/provider/ya.make +++ b/ydb/library/yql/providers/generic/provider/ya.make @@ -21,6 +21,8 @@ SRCS( yql_generic_settings.cpp yql_generic_state.h yql_generic_state.cpp + yql_generic_utils.h + yql_generic_utils.cpp ) YQL_LAST_ABI_VERSION() @@ -45,13 +47,16 @@ PEERDIR( ydb/library/yql/providers/common/provider ydb/library/yql/providers/common/pushdown ydb/library/yql/providers/common/structured_token + ydb/library/yql/providers/common/token_accessor/client ydb/library/yql/providers/common/transform ydb/library/yql/providers/dq/common ydb/library/yql/providers/dq/expr_nodes ydb/library/yql/providers/generic/expr_nodes ydb/library/yql/providers/generic/proto + ydb/library/yql/providers/generic/connector/api/common ydb/library/yql/providers/generic/connector/libcpp ydb/library/yql/utils/plan + ydb/public/sdk/cpp/client/ydb_types/credentials ) END() diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp index 249e3e1002ff..886b493076ee 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -11,8 +12,6 @@ #include "yql_generic_cluster_config.h" namespace NYql { - using namespace NConnector; - using namespace NConnector::NApi; using namespace fmt::literals; void ParseLogin( @@ -20,7 +19,8 @@ namespace NYql { NYql::TGenericClusterConfig& clusterConfig) { auto it = properties.find("login"); if (it == properties.cend()) { - ythrow yexception() << "missing 'LOGIN' value"; + // It's OK not to have credentials for base auth + return; } if (!it->second) { @@ -35,7 +35,8 @@ namespace NYql { NYql::TGenericClusterConfig& clusterConfig) { auto it = properties.find("password"); if (it == properties.cend()) { - ythrow yexception() << "missing 'PASSWORD' value"; + // It's OK not to have credentials for base auth + return; } clusterConfig.MutableCredentials()->Mutablebasic()->Setpassword(it->second); @@ -157,6 +158,25 @@ namespace NYql { clusterConfig.SetDatabaseId(it->second); } + void ParseDatabaseId(const THashMap& properties, + NYql::TGenericClusterConfig& clusterConfig) { + auto it = properties.find("database_id"); + if (it == properties.cend()) { + return; + } + + if (!it->second) { + // DATABASE_ID is an optional field + return; + } + + if (!it->second) { + ythrow yexception() << "invalid 'DATABASE_ID' value: '" << it->second << "'"; + } + + clusterConfig.SetDatabaseId(it->second); + } + void ParseSourceType(const THashMap& properties, NYql::TGenericClusterConfig& clusterConfig) { auto it = properties.find("source_type"); @@ -240,20 +260,6 @@ namespace NYql { } TGenericClusterConfig GenericClusterConfigFromProperties(const TString& clusterName, const THashMap& properties) { - // some cross-parameter validations - auto location = KeyIsSet(properties, "location"); - auto mdbClusterId = KeyIsSet(properties, "mdb_cluster_id"); - - if ((location && mdbClusterId) || (!location and !mdbClusterId)) { - ythrow yexception() << "you must provide either 'LOCATION' or 'MDB_CLUSTER_ID' parameter"; - } - - auto serviceAccountId = KeyIsSet(properties, "serviceAccountId"); - auto serviceAccountIdSignature = KeyIsSet(properties, "serviceAccountIdSignature"); - if ((serviceAccountId && !serviceAccountIdSignature) || (!serviceAccountId && serviceAccountIdSignature)) { - ythrow yexception() << "you must provide either both 'SERVICE_ACCOUNT_ID' and 'SERVICE_ACCOUNT_ID_SIGNATURE' parameters or none of them"; - } - NYql::TGenericClusterConfig clusterConfig; clusterConfig.set_name(clusterName); ParseLogin(properties, clusterConfig); @@ -263,6 +269,7 @@ namespace NYql { ParseDatabaseName(properties, clusterConfig); ParseSchema(properties, clusterConfig); ParseMdbClusterId(properties, clusterConfig); + ParseDatabaseId(properties, clusterConfig); ParseSourceType(properties, clusterConfig); ParseProtocol(properties, clusterConfig); ParseServiceAccountId(properties, clusterConfig); @@ -310,27 +317,16 @@ namespace NYql { "protocol"_a = NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol())); } + static const TSet managedDatabaseKinds{ + NConnector::NApi::EDataSourceKind::POSTGRESQL, + NConnector::NApi::EDataSourceKind::CLICKHOUSE, + NConnector::NApi::EDataSourceKind::YDB}; + void ValidateGenericClusterConfig( const NYql::TGenericClusterConfig& clusterConfig, const TString& context) { - // cross-parameter validations for optional fields - auto hasEndpoint = clusterConfig.HasEndpoint(); - auto databaseId = clusterConfig.GetDatabaseId(); - - if ((hasEndpoint && databaseId)) { - return ValidationError( - clusterConfig, - context, - "both 'Endpoint' and 'DatabaseId' fields are set; you must set only one of them"); - } - - if (!hasEndpoint and !databaseId) { - return ValidationError( - clusterConfig, - context, - "none of 'Endpoint' and 'DatabaseId' fields are set; you must set one of them"); - } - + // Service account ID and service account ID signature are tightly coupled: + // if one is set, another one must be set too. auto serviceAccountId = clusterConfig.GetServiceAccountId(); auto serviceAccountIdSignature = clusterConfig.GetServiceAccountIdSignature(); if (serviceAccountId && !serviceAccountIdSignature) { @@ -349,6 +345,8 @@ namespace NYql { "you must set either both 'ServiceAccountId' and 'ServiceAccountIdSignature' fields or none of them"); } + // Service account credentials and raw tokens are mutually exclusive: + // no need to specify service account parameters if one already has a token. auto token = clusterConfig.GetToken(); if ((serviceAccountId && serviceAccountIdSignature) && token) { return ValidationError( @@ -357,23 +355,60 @@ namespace NYql { "you must set either ('ServiceAccountId', 'ServiceAccountIdSignature') fields or 'Token' field or none of them"); } + // All managed databases: + // * set endpoint when working with on-prem instances + // * set database id when working with managed instances + if (managedDatabaseKinds.contains(clusterConfig.GetKind())) { + auto hasEndpoint = clusterConfig.HasEndpoint(); + auto hasDatabaseId = clusterConfig.HasDatabaseId(); + + if (hasEndpoint && hasDatabaseId) { + return ValidationError( + clusterConfig, + context, + "both 'Endpoint' and 'DatabaseId' fields are set; you must set only one of them"); + } + + if (!hasEndpoint and !hasDatabaseId) { + return ValidationError( + clusterConfig, + context, + "none of 'Endpoint' and 'DatabaseId' fields are set; you must set one of them"); + } + } + + // YDB: + // * set database name when working with on-prem YDB instance; + // * but set database ID when working with managed YDB. + if (clusterConfig.GetKind() == NConnector::NApi::YDB) { + if (clusterConfig.HasDatabaseName() && clusterConfig.HasDatabaseId()) { + return ValidationError( + clusterConfig, + context, + "For YDB clusters you must set either database name or database id, but you have set both of them"); + } + + if (!clusterConfig.HasDatabaseName() && !clusterConfig.HasDatabaseId()) { + return ValidationError( + clusterConfig, + context, + "For YDB clusters you must set either database name or database id, but you have set none of them"); + } + } + // check required fields if (!clusterConfig.GetName()) { return ValidationError(clusterConfig, context, "empty field 'Name'"); } - if (clusterConfig.GetKind() == EDataSourceKind::DATA_SOURCE_KIND_UNSPECIFIED) { + if (clusterConfig.GetKind() == NConnector::NApi::EDataSourceKind::DATA_SOURCE_KIND_UNSPECIFIED) { return ValidationError(clusterConfig, context, "empty field 'Kind'"); } - if (!clusterConfig.GetCredentials().Getbasic().Getusername()) { - return ValidationError(clusterConfig, context, "empty field 'Credentials.basic.username'"); - } - // TODO: validate Credentials.basic.password after ClickHouse recipe fix // TODO: validate DatabaseName field during https://st.yandex-team.ru/YQ-2494 - if (clusterConfig.GetProtocol() == EProtocol::PROTOCOL_UNSPECIFIED) { + if (clusterConfig.GetProtocol() == NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED) { return ValidationError(clusterConfig, context, "empty field 'Protocol'"); } } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp index d00f82047c37..74a6bd819177 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp @@ -93,17 +93,16 @@ namespace NYql { } void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, - TString& sourceType) override { + TString& sourceType, size_t) override { const TDqSource source(&node); if (const auto maybeSettings = source.Settings().Maybe()) { const auto settings = maybeSettings.Cast(); const auto& clusterName = source.DataSource().Cast().Cluster().StringValue(); const auto& table = settings.Table().StringValue(); - const auto& token = settings.Token().Name().StringValue(); - const auto& endpoint = State_->Configuration->ClusterNamesToClusterConfigs[clusterName].endpoint(); + const auto& clusterConfig = State_->Configuration->ClusterNamesToClusterConfigs[clusterName]; + const auto& endpoint = clusterConfig.endpoint(); - Generic::TSource srcDesc; - srcDesc.set_token(token); + Generic::TSource source; // for backward compability full path can be used (cluster_name.`db_name.table`) // TODO: simplify during https://st.yandex-team.ru/YQ-2494 @@ -126,7 +125,7 @@ namespace NYql { } // prepare select - auto select = srcDesc.mutable_select(); + auto select = source.mutable_select(); select->mutable_from()->set_table(TString(dbTable)); select->mutable_data_source_instance()->CopyFrom(tableMeta.value()->DataSourceInstance); @@ -149,13 +148,22 @@ namespace NYql { } } - // store data source instance - srcDesc.mutable_data_source_instance()->CopyFrom(tableMeta.value()->DataSourceInstance); + // Managed YDB supports access via IAM token. + // If exist, copy service account creds to obtain tokens during request execution phase. + // If exists, copy previously created token. + if (clusterConfig.kind() == NConnector::NApi::EDataSourceKind::YDB) { + source.SetServiceAccountId(clusterConfig.GetServiceAccountId()); + source.SetServiceAccountIdSignature(clusterConfig.GetServiceAccountIdSignature()); + source.SetToken(State_->Types->Credentials->FindCredentialContent( + "default_" + clusterConfig.name(), + "default_generic", + clusterConfig.GetToken())); + } // preserve source description for read actor - protoSettings.PackFrom(srcDesc); + protoSettings.PackFrom(source); - switch (srcDesc.data_source_instance().kind()) { + switch (select->data_source_instance().kind()) { case NYql::NConnector::NApi::CLICKHOUSE: sourceType = "ClickHouseGeneric"; break; diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp index 2089adbc798e..ae9e504f3c1b 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp @@ -1,4 +1,6 @@ #include "yql_generic_provider_impl.h" +#include "yql_generic_utils.h" + #include #include #include @@ -91,6 +93,7 @@ namespace NYql { for (const auto& [databaseIdWithType, databaseDescription] : response.DatabaseDescriptionMap) { YQL_CLOG(INFO, ProviderGeneric) << "resolved database id into endpoint" << ": databaseId=" << databaseIdWithType.first + << ", databaseKind=" << databaseIdWithType.second << ", host=" << databaseDescription.Host << ", port=" << databaseDescription.Port; } @@ -157,7 +160,10 @@ namespace NYql { if (clusterConfigIter == clusterNamesToClusterConfigs.end()) { TIssues issues; - issues.AddIssue(TStringBuilder() << "no cluster names for database id " << databaseIdWithType.first << " and cluster name " << clusterName); + issues.AddIssue(TStringBuilder() << "no cluster names for database id " + << databaseIdWithType.first + << " and cluster name " + << clusterName); ctx.IssueManager.AddIssues(issues); return TStatus::Error; } @@ -165,6 +171,15 @@ namespace NYql { auto endpointDst = clusterConfigIter->second.mutable_endpoint(); endpointDst->set_host(databaseDescription.Host); endpointDst->set_port(databaseDescription.Port); + + // If we work with managed YDB, we find out database name + // only after database id (== cluster id) resolving. + if (clusterConfigIter->second.kind() == NConnector::NApi::EDataSourceKind::YDB) { + clusterConfigIter->second.set_databasename(databaseDescription.Database); + } + + YQL_CLOG(INFO, ProviderGeneric) << "ModifyClusterConfigs: " + << DumpGenericClusterConfig(clusterConfigIter->second); } } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp index 929884754b96..383e342c1523 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp @@ -14,10 +14,11 @@ #include #include #include -#include -#include +#include #include #include +#include +#include namespace NYql { using namespace NNodes; @@ -32,7 +33,8 @@ namespace NYql { }; class TGenericLoadTableMetadataTransformer: public TGraphTransformerBase { - using TMapType = std::unordered_map>; + using TMapType = + std::unordered_map>; public: TGenericLoadTableMetadataTransformer(TGenericState::TPtr state) @@ -48,42 +50,37 @@ namespace NYql { } std::unordered_set pendingTables; - const auto& reads = FindNodes(input, - [&](const TExprNode::TPtr& node) { - if (const auto maybeRead = TMaybeNode(node)) { - return maybeRead.Cast().DataSource().Category().Value() == GenericProviderName; - } - return false; - }); + const auto& reads = FindNodes(input, [&](const TExprNode::TPtr& node) { + if (const auto maybeRead = TMaybeNode(node)) { + return maybeRead.Cast().DataSource().Category().Value() == GenericProviderName; + } + return false; + }); if (!reads.empty()) { for (const auto& r : reads) { const TGenRead read(r); if (!read.FreeArgs().Get(2).Ref().IsCallable("MrTableConcat")) { - ctx.AddError( - TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), TStringBuilder() << "Expected key")); + ctx.AddError(TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), "Expected key")); return TStatus::Error; } const auto maybeKey = TExprBase(read.FreeArgs().Get(2).Ref().HeadPtr()).Maybe(); if (!maybeKey) { - ctx.AddError( - TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), TStringBuilder() << "Expected key")); + ctx.AddError(TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), "Expected key")); return TStatus::Error; } const auto& keyArg = maybeKey.Cast().Ref().Head(); if (!keyArg.IsList() || keyArg.ChildrenSize() != 2U || !keyArg.Head().IsAtom("table") || !keyArg.Tail().IsCallable(TCoString::CallableName())) { - ctx.AddError( - TIssue(ctx.GetPosition(keyArg.Pos()), TStringBuilder() << "Expected single table name")); + ctx.AddError(TIssue(ctx.GetPosition(keyArg.Pos()), "Expected single table name")); return TStatus::Error; } const auto clusterName = read.DataSource().Cluster().StringValue(); const auto tableName = TString(keyArg.Tail().Head().Content()); if (pendingTables.insert(TGenericState::TTableAddress(clusterName, tableName)).second) { - YQL_CLOG(INFO, ProviderGeneric) - << "Loading table meta for: `" << clusterName << "`.`" << tableName << "`"; + YQL_CLOG(INFO, ProviderGeneric) << "Loading table meta for: `" << clusterName << "`.`" << tableName << "`"; } } } @@ -108,6 +105,7 @@ namespace NYql { auto desc = emplaceIt.first->second; desc->DataSourceInstance = request.data_source_instance(); + Y_ENSURE(State_->GenericClient); State_->GenericClient->DescribeTable(request).Subscribe( [desc = std::move(desc), promise = std::move(promise)](const NConnector::TDescribeTableAsyncResult& f1) mutable { NConnector::TDescribeTableAsyncResult f2(f1); @@ -196,14 +194,13 @@ namespace NYql { } else { const auto& error = response.error(); NConnector::ErrorToExprCtx(error, ctx, ctx.GetPosition(read.Pos()), - TStringBuilder() - << "Loading metadata for table: " << clusterName << '.' << tableName); + TStringBuilder() << "Loading metadata for table: " << clusterName << '.' << tableName); hasErrors = true; break; } } else { - ctx.AddError(TIssue(ctx.GetPosition(read.Pos()), - TStringBuilder() << "Not found result for " << clusterName << '.' << tableName)); + ctx.AddError(TIssue(ctx.GetPosition(read.Pos()), TStringBuilder() + << "Not found result for " << clusterName << '.' << tableName)); hasErrors = true; break; } @@ -222,10 +219,8 @@ namespace NYql { } private: - const TStructExprType* ParseTableMeta(const NConnector::NApi::TSchema& schema, - const std::string_view& cluster, - const std::string_view& table, TExprContext& ctx, - TVector& columnOrder) try { + const TStructExprType* ParseTableMeta(const NConnector::NApi::TSchema& schema, const std::string_view& cluster, + const std::string_view& table, TExprContext& ctx, TVector& columnOrder) try { TVector items; auto columns = schema.columns(); @@ -250,20 +245,74 @@ namespace NYql { return nullptr; } - void FillDescribeTableRequest(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, const TString& tablePath) { + void FillDescribeTableRequest(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, + const TString& tablePath) { const auto dataSourceKind = clusterConfig.GetKind(); auto dsi = request.mutable_data_source_instance(); - *dsi->mutable_endpoint() = clusterConfig.GetEndpoint(); dsi->set_kind(dataSourceKind); - *dsi->mutable_credentials() = clusterConfig.GetCredentials(); dsi->set_use_tls(clusterConfig.GetUseSsl()); dsi->set_protocol(clusterConfig.GetProtocol()); + FillCredentials(request, clusterConfig); FillTypeMappingSettings(request); FillDataSourceOptions(request, clusterConfig); FillTablePath(request, clusterConfig, tablePath); } + void FillCredentials(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig) { + auto dsi = request.mutable_data_source_instance(); + + // If login/password is provided, just copy them into request: + // connector will use Basic Auth to access external data sources. + if (clusterConfig.GetCredentials().Hasbasic()) { + *dsi->mutable_credentials() = clusterConfig.GetCredentials(); + return; + } + + // If there are no Basic Auth parameters, two options can be considered: + + // 1. Client provided own IAM-token to access external data source + auto iamToken = State_->Types->Credentials->FindCredentialContent( + "default_" + clusterConfig.name(), + "default_generic", + clusterConfig.GetToken()); + if (iamToken) { + *dsi->mutable_credentials()->mutable_token()->mutable_value() = iamToken; + *dsi->mutable_credentials()->mutable_token()->mutable_type() = "IAM"; + return; + } + + // 2. Client provided service account creds that must be converted into IAM-token + Y_ENSURE(State_->CredentialsFactory, "CredentialsFactory is not initialized"); + + auto structuredTokenJSON = TStructuredTokenBuilder().SetServiceAccountIdAuth( + clusterConfig.GetServiceAccountId(), + clusterConfig.GetServiceAccountIdSignature()) + .ToJson(); + + Y_ENSURE(structuredTokenJSON, "empty structured token"); + + // Create provider or get existing one. + // It's crucial to reuse providers because their construction implies synchronous IO. + auto providersIt = State_->CredentialProviders.find(clusterConfig.name()); + if (providersIt == State_->CredentialProviders.end()) { + auto credentialsProviderFactory = CreateCredentialsProviderFactoryForStructuredToken( + State_->CredentialsFactory, + structuredTokenJSON, + false); + + providersIt = State_->CredentialProviders.emplace( + std::make_pair(clusterConfig.name(), credentialsProviderFactory->CreateProvider())) + .first; + } + + iamToken = providersIt->second->GetAuthInfo(); + Y_ENSURE(iamToken, "empty IAM token"); + + *dsi->mutable_credentials()->mutable_token()->mutable_value() = iamToken; + *dsi->mutable_credentials()->mutable_token()->mutable_type() = "IAM"; + } + void FillDataSourceOptions(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig) { const auto dataSourceKind = clusterConfig.GetKind(); switch (dataSourceKind) { @@ -287,13 +336,14 @@ namespace NYql { } break; default: - ythrow yexception() << "Unexpected data source kind: '" - << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) << "'"; + ythrow yexception() << "Unexpected data source kind: '" << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) + << "'"; } } void FillTypeMappingSettings(NConnector::NApi::TDescribeTableRequest& request) { - const TString dateTimeFormat = State_->Configuration->DateTimeFormat.Get().GetOrElse(TGenericSettings::TDefault::DateTimeFormat); + const TString dateTimeFormat = + State_->Configuration->DateTimeFormat.Get().GetOrElse(TGenericSettings::TDefault::DateTimeFormat); if (dateTimeFormat == "string") { request.mutable_type_mapping_settings()->set_date_time_format(NConnector::NApi::STRING_FORMAT); } else if (dateTimeFormat == "YQL") { @@ -303,7 +353,8 @@ namespace NYql { } } - void FillTablePath(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, const TString& tablePath) { + void FillTablePath(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, + const TString& tablePath) { // for backward compability full path can be used (cluster_name.`db_name.table`) // TODO: simplify during https://st.yandex-team.ru/YQ-2494 const auto dataSourceKind = clusterConfig.GetKind(); diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp index d2178ccd5b0e..c720e1b64d0e 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp @@ -6,13 +6,14 @@ namespace NYql { TDataProviderInitializer GetGenericDataProviderInitializer(NConnector::IClient::TPtr genericClient, - const std::shared_ptr dbResolver) + const IDatabaseAsyncResolver::TPtr& dbResolver, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory) { - return [genericClient, dbResolver](const TString& userName, const TString& sessionId, const TGatewaysConfig* gatewaysConfig, - const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, - TIntrusivePtr randomProvider, TIntrusivePtr typeCtx, - const TOperationProgressWriter& progressWriter, const TYqlOperationOptions& operationOptions, - THiddenQueryAborter) + return [genericClient, dbResolver, credentialsFactory](const TString& userName, const TString& sessionId, const TGatewaysConfig* gatewaysConfig, + const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, + TIntrusivePtr randomProvider, TIntrusivePtr typeCtx, + const TOperationProgressWriter& progressWriter, const TYqlOperationOptions& operationOptions, + THiddenQueryAborter) { Y_UNUSED(sessionId); Y_UNUSED(userName); @@ -25,6 +26,7 @@ namespace NYql { typeCtx.Get(), functionRegistry, dbResolver, + credentialsFactory, genericClient, gatewaysConfig->GetGeneric()); diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_provider.h b/ydb/library/yql/providers/generic/provider/yql_generic_provider.h index d990b2084bb4..5c8e4c967a8c 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_provider.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_provider.h @@ -2,14 +2,14 @@ #include "yql_generic_state.h" -#include #include #include namespace NYql { TDataProviderInitializer GetGenericDataProviderInitializer( - NConnector::IClient::TPtr genericClient, // required - std::shared_ptr dbResolver = nullptr // can be missing in on-prem installations + NConnector::IClient::TPtr genericClient, // required + const IDatabaseAsyncResolver::TPtr& dbResolver = nullptr, // can be missing in on-prem installations + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory = nullptr // can be missing in on-prem installations ); TIntrusivePtr CreateGenericDataSource(TGenericState::TPtr state); diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp index 0b4c93a8bf4d..1c2521573ddb 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp @@ -1,5 +1,6 @@ #include "yql_generic_cluster_config.h" #include "yql_generic_settings.h" +#include "yql_generic_utils.h" #include #include @@ -34,7 +35,7 @@ namespace NYql { const TCredentials::TPtr& credentials) { ValidateGenericClusterConfig(clusterConfig, "TGenericConfiguration::AddCluster"); - YQL_CLOG(INFO, ProviderGeneric) << "generic provider add cluster: " << DumpGenericClusterConfig(clusterConfig); + YQL_CLOG(INFO, ProviderGeneric) << "GenericConfiguration::AddCluster: " << DumpGenericClusterConfig(clusterConfig); const auto& clusterName = clusterConfig.GetName(); const auto& databaseId = clusterConfig.GetDatabaseId(); @@ -95,23 +96,6 @@ namespace NYql { "or set (ServiceAccountId && ServiceAccountIdSignature) in cluster config"; } - TString TGenericConfiguration::DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig) const { - TStringBuilder sb; - sb << "name = " << clusterConfig.GetName() - << ", kind = " << NConnector::NApi::EDataSourceKind_Name(clusterConfig.GetKind()) - << ", database name = " << clusterConfig.GetDatabaseName() - << ", database id = " << clusterConfig.GetName() - << ", endpoint = " << clusterConfig.GetEndpoint() - << ", use tls = " << clusterConfig.GetUseSsl() - << ", protocol = " << NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol()); - - for (const auto& [key, value] : clusterConfig.GetDataSourceOptions()) { - sb << ", " << key << " = " << value; - } - - return sb; - } - TGenericSettings::TConstPtr TGenericConfiguration::Snapshot() const { return std::make_shared(*this); } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_settings.h b/ydb/library/yql/providers/generic/provider/yql_generic_settings.h index d783963a6589..07a19c5ce827 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_settings.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_settings.h @@ -25,26 +25,21 @@ namespace NYql { TGenericConfiguration(); TGenericConfiguration(const TGenericConfiguration&) = delete; - void Init(const NYql::TGenericGatewayConfig& gatewayConfig, - const std::shared_ptr databaseResolver, - NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, - const TCredentials::TPtr& credentials); + void Init(const NYql::TGenericGatewayConfig& gatewayConfig, const std::shared_ptr databaseResolver, + NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, const TCredentials::TPtr& credentials); - void AddCluster(const TGenericClusterConfig& clusterConfig, - const std::shared_ptr databaseResolver, - NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, - const TCredentials::TPtr& credentials); + void AddCluster(const TGenericClusterConfig& clusterConfig, const std::shared_ptr databaseResolver, + NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, const TCredentials::TPtr& credentials); TGenericSettings::TConstPtr Snapshot() const; bool HasCluster(TStringBuf cluster) const; private: TString MakeStructuredToken(const TGenericClusterConfig& clusterConfig, const TCredentials::TPtr& credentials) const; - TString DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig) const; public: THashMap Tokens; THashMap ClusterNamesToClusterConfigs; // cluster name -> cluster config THashMap> DatabaseIdsToClusterNames; // database id -> cluster name }; -} +} //namespace NYql diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_state.h b/ydb/library/yql/providers/generic/provider/yql_generic_state.h index e2362bc5ad27..3d69efdfe0d3 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_state.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_state.h @@ -3,7 +3,9 @@ #include "yql_generic_settings.h" #include +#include #include +#include namespace NKikimr::NMiniKQL { class IFunctionRegistry; @@ -29,13 +31,15 @@ namespace NYql { TGenericState( TTypeAnnotationContext* types, const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, - const std::shared_ptr& databaseResolver, + const std::shared_ptr& databaseResolver, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory, const NConnector::IClient::TPtr& genericClient, const TGenericGatewayConfig& gatewayConfig) : Types(types) , Configuration(MakeIntrusive()) , FunctionRegistry(functionRegistry) , DatabaseResolver(databaseResolver) + , CredentialsFactory(credentialsFactory) , GenericClient(genericClient) { Configuration->Init(gatewayConfig, databaseResolver, DatabaseAuth, types->Credentials); @@ -49,9 +53,15 @@ namespace NYql { TGenericConfiguration::TPtr Configuration = MakeIntrusive(); const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry; - // key - (database id, database type), value - credentials to access MDB API - NYql::IDatabaseAsyncResolver::TDatabaseAuthMap DatabaseAuth; - std::shared_ptr DatabaseResolver; + // key - (database id, database type), value - credentials to access managed APIs + IDatabaseAsyncResolver::TDatabaseAuthMap DatabaseAuth; + std::shared_ptr DatabaseResolver; + + // key - cluster name, value - TCredentialsProviderPtr + // It's important to cache credentials providers, because they make IO + // (synchronous call via Token Accessor client) during the construction. + std::unordered_map CredentialProviders; + ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory; NConnector::IClient::TPtr GenericClient; diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp new file mode 100644 index 000000000000..aba0b51924b3 --- /dev/null +++ b/ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp @@ -0,0 +1,22 @@ +#include "yql_generic_utils.h" + +#include + +namespace NYql { + TString DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig) { + TStringBuilder sb; + sb << "name = " << clusterConfig.GetName() + << ", kind = " << NConnector::NApi::EDataSourceKind_Name(clusterConfig.GetKind()) + << ", database name = " << clusterConfig.GetDatabaseName() + << ", database id = " << clusterConfig.GetDatabaseId() + << ", endpoint = " << clusterConfig.GetEndpoint() + << ", use tls = " << clusterConfig.GetUseSsl() + << ", protocol = " << NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol()); + + for (const auto& [key, value] : clusterConfig.GetDataSourceOptions()) { + sb << ", " << key << " = " << value; + } + + return sb; + } +} diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_utils.h b/ydb/library/yql/providers/generic/provider/yql_generic_utils.h new file mode 100644 index 000000000000..49c6bab7abca --- /dev/null +++ b/ydb/library/yql/providers/generic/provider/yql_generic_utils.h @@ -0,0 +1,8 @@ +#pragma once + +#include +#include + +namespace NYql { + TString DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig); +} diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp index f8b58da69894..f98c58d173d6 100644 --- a/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp +++ b/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp @@ -179,7 +179,7 @@ class TPqDqIntegration: public TDqIntegrationBase { } } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override { if (auto maybeDqSource = TMaybeNode(&node)) { auto settings = maybeDqSource.Cast().Settings(); if (auto maybeTopicSource = TMaybeNode(settings.Raw())) { diff --git a/ydb/library/yql/providers/s3/actors/ya.make b/ydb/library/yql/providers/s3/actors/ya.make index 721495f7e054..8ffe41cf0ddf 100644 --- a/ydb/library/yql/providers/s3/actors/ya.make +++ b/ydb/library/yql/providers/s3/actors/ya.make @@ -20,9 +20,11 @@ PEERDIR( contrib/libs/fmt contrib/libs/poco/Util ydb/library/actors/http + library/cpp/protobuf/util library/cpp/string_utils/base64 library/cpp/string_utils/quote library/cpp/xml/document + ydb/core/base ydb/core/fq/libs/events ydb/library/yql/dq/actors/compute ydb/library/yql/minikql/computation @@ -36,6 +38,8 @@ PEERDIR( ydb/library/yql/providers/s3/credentials ydb/library/yql/providers/s3/object_listers ydb/library/yql/providers/s3/proto + ydb/library/yql/providers/s3/range_helpers + ydb/library/yql/public/issue ydb/library/yql/public/types ydb/library/yql/udfs/common/clickhouse/client ) diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp index 53e189430213..46f997504e97 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp @@ -204,6 +204,8 @@ class TS3ApplicatorActor; using TObjectStorageRequest = std::function; class TS3ApplicatorActor : public NActors::TActorBootstrapped { + static constexpr ui64 GLOBAL_RETRY_LIMIT = 100; + public: using NActors::TActorBootstrapped::Send; @@ -230,7 +232,7 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedCreateRetryState()->GetNextRetryDelay(curlResponseCode, httpResponseCode); + Issues.AddIssue(TStringBuilder() << "Retry operation " << operationName << ", curl error: " << curl_easy_strerror(curlResponseCode) << ", http code: " << httpResponseCode << ", url: " << url); if (result) { RetryCount--; } else { - Finish(true); + Finish(true, RetryCount + ? TString("Number of retries exceeded limit per operation") + : TStringBuilder() << "Number of retries exceeded global limit in " << GLOBAL_RETRY_LIMIT << " retries"); } return result; } @@ -370,8 +375,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("CommitMultipartUpload ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "CommitMultipartUpload")) { PushCommitMultipartUpload(ev->Get()->State); } } @@ -444,8 +450,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("ListMultipartUploads ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "ListMultipartUploads")) { PushListMultipartUploads(ev->Get()->State); } } @@ -467,8 +474,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("AbortMultipartUpload ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "AbortMultipartUpload")) { PushAbortMultipartUpload(ev->Get()->State); } } @@ -507,8 +515,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("ListParts ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "ListParts")) { PushListParts(ev->Get()->State); } } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp index 695588bd1540..8a5454368bfb 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp @@ -31,6 +31,9 @@ #include #include +#include +#include + #endif #include "yql_arrow_column_converters.h" @@ -38,9 +41,11 @@ #include "yql_s3_read_actor.h" #include "yql_s3_source_factory.h" +#include #include #include +#include #include #include #include @@ -51,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +68,7 @@ #include #include #include +#include #include #include @@ -138,19 +145,78 @@ struct TS3ReadError : public yexception { using yexception::yexception; }; -struct TObjectPath { - TString Path; - size_t Size; - size_t PathIndex; +using NS3::FileQueue::TObjectPath; +using NDqProto::TMessageTransportMeta; + +struct TEvS3FileQueue { + enum EEv : ui32 { + EvBegin = EventSpaceBegin(NKikimr::TKikimrEvents::ES_S3_FILE_QUEUE), - TObjectPath(TString path, size_t size, size_t pathIndex) - : Path(std::move(path)), Size(size), PathIndex(pathIndex) { } + EvUpdateConsumersCount = EvBegin, + EvAck, + EvGetNextBatch, + EvObjectPathBatch, + EvObjectPathReadError, + + EvEnd + }; + static_assert(EvEnd < EventSpaceEnd(NKikimr::TKikimrEvents::ES_S3_FILE_QUEUE), + "expect EvEnd < EventSpaceEnd(TEvents::ES_S3_FILE_QUEUE)"); + + struct TEvUpdateConsumersCount : + public TEventPB { + + explicit TEvUpdateConsumersCount(ui64 consumersCountDelta = 0) { + Record.SetConsumersCountDelta(consumersCountDelta); + } + }; + + struct TEvAck : + public TEventPB { + + TEvAck() = default; + + explicit TEvAck(const TMessageTransportMeta& transportMeta) { + Record.MutableTransportMeta()->CopyFrom(transportMeta); + } + }; + + struct TEvGetNextBatch : + public TEventPB { + }; + + struct TEvObjectPathBatch : + public NActors::TEventPB { + + TEvObjectPathBatch() { + Record.SetNoMoreFiles(false); + } + + TEvObjectPathBatch(std::vector objectPaths, bool noMoreFiles, const TMessageTransportMeta& transportMeta) { + Record.MutableObjectPaths()->Assign( + std::make_move_iterator(objectPaths.begin()), + std::make_move_iterator(objectPaths.end())); + Record.SetNoMoreFiles(noMoreFiles); + Record.MutableTransportMeta()->CopyFrom(transportMeta); + } + }; + + struct TEvObjectPathReadError : + public NActors::TEventPB { + + TEvObjectPathReadError() = default; + + TEvObjectPathReadError(TIssues issues, const TMessageTransportMeta& transportMeta) { + IssuesToMessage(issues, Record.MutableIssues()); + Record.MutableTransportMeta()->CopyFrom(transportMeta); + } + }; }; struct TEvPrivate { // Event ids enum EEv : ui32 { - EvBegin = EventSpaceBegin(TEvents::ES_PRIVATE), + EvBegin = TEvRetryQueuePrivate::EvEnd, // Leave space for RetryQueue events EvReadResult = EvBegin, EvDataPart, @@ -162,8 +228,6 @@ struct TEvPrivate { EvNextRecordBatch, EvFileFinished, EvContinue, - EvObjectPathBatch, - EvObjectPathReadError, EvReadResult2, EvEnd @@ -265,21 +329,6 @@ struct TEvPrivate { struct TEvContinue : public NActors::TEventLocal { }; - struct TEvObjectPathBatch : - public NActors::TEventLocal { - std::vector ObjectPaths; - bool NoMoreFiles = false; - TEvObjectPathBatch( - std::vector objectPaths, bool noMoreFiles) - : ObjectPaths(std::move(objectPaths)), NoMoreFiles(noMoreFiles) { } - }; - - struct TEvObjectPathReadError : - public NActors::TEventLocal { - TIssues Issues; - TEvObjectPathReadError(TIssues issues) : Issues(std::move(issues)) { } - }; - struct TReadRange { int64_t Offset; int64_t Length; @@ -304,24 +353,33 @@ class TS3FileQueueActor : public TActorBootstrapped { struct TEvPrivatePrivate { enum { - EvGetNextFile = EventSpaceBegin(TEvents::ES_PRIVATE), - EvNextListingChunkReceived, + EvBegin = TEvRetryQueuePrivate::EvEnd, // Leave space for RetryQueue events + + EvNextListingChunkReceived = EvBegin, + EvRoundRobinStageTimeout, + EvTransitToErrorState, + EvEnd }; static_assert( EvEnd <= EventSpaceEnd(TEvents::ES_PRIVATE), "expected EvEnd <= EventSpaceEnd(TEvents::ES_PRIVATE)"); - struct TEvGetNextFile : public TEventLocal { - size_t RequestedAmount = 1; - TEvGetNextFile(size_t requestedAmount) : RequestedAmount(requestedAmount){}; - }; - struct TEvNextListingChunkReceived : - public TEventLocal { + struct TEvNextListingChunkReceived : public TEventLocal { NS3Lister::TListResult ListingResult; TEvNextListingChunkReceived(NS3Lister::TListResult listingResult) : ListingResult(std::move(listingResult)){}; }; + + struct TEvRoundRobinStageTimeout : public TEventLocal { + }; + + struct TEvTransitToErrorState : public TEventLocal { + explicit TEvTransitToErrorState(TIssues&& issues) + : Issues(issues) { + } + TIssues Issues; + }; }; using TBase = TActorBootstrapped; @@ -330,6 +388,10 @@ class TS3FileQueueActor : public TActorBootstrapped { TPathList paths, size_t prefetchSize, ui64 fileSizeLimit, + bool useRuntimeListing, + ui64 consumersCount, + ui64 batchSizeLimit, + ui64 batchObjectCountLimit, IHTTPGateway::TPtr gateway, TString url, TS3Credentials::TAuthInfo authInfo, @@ -340,6 +402,10 @@ class TS3FileQueueActor : public TActorBootstrapped { , PrefetchSize(prefetchSize) , FileSizeLimit(fileSizeLimit) , MaybeIssues(Nothing()) + , UseRuntimeListing(useRuntimeListing) + , ConsumersCount(consumersCount) + , BatchSizeLimit(batchSizeLimit) + , BatchObjectCountLimit(batchObjectCountLimit) , Gateway(std::move(gateway)) , Url(std::move(url)) , AuthInfo(std::move(authInfo)) @@ -347,20 +413,28 @@ class TS3FileQueueActor : public TActorBootstrapped { , PatternVariant(patternVariant) , PatternType(patternType) { for (size_t i = 0; i < paths.size(); ++i) { + TObjectPath object; + object.SetPath(paths[i].Path); + object.SetPathIndex(paths[i].PathIndex); if (paths[i].IsDirectory) { - Directories.emplace_back(paths[i].Path, 0, paths[i].PathIndex); + object.SetSize(0); + Directories.emplace_back(std::move(object)); } else { - Objects.emplace_back(paths[i].Path, paths[i].Size, paths[i].PathIndex); + object.SetSize(paths[i].Size); + Objects.emplace_back(std::move(object)); } } } void Bootstrap() { + if (UseRuntimeListing) { + Schedule(PoisonTimeout, new TEvents::TEvPoison()); + } if (Directories.empty()) { - LOG_I("TS3FileQueueActor", "Bootstrap there is no directories to list"); + LOG_I("TS3FileQueueActor", "Bootstrap there is no directories to list, consumersCount=" << ConsumersCount); Become(&TS3FileQueueActor::NoMoreDirectoriesState); } else { - LOG_I("TS3FileQueueActor", "Bootstrap there are directories to list"); + LOG_I("TS3FileQueueActor", "Bootstrap there are directories to list, consumersCount=" << ConsumersCount); TryPreFetch(); Become(&TS3FileQueueActor::ThereAreDirectoriesToListState); } @@ -369,9 +443,12 @@ class TS3FileQueueActor : public TActorBootstrapped { STATEFN(ThereAreDirectoriesToListState) { try { switch (const auto etype = ev->GetTypeRewrite()) { - hFunc(TEvPrivatePrivate::TEvGetNextFile, HandleGetNextFile); + hFunc(TEvS3FileQueue::TEvUpdateConsumersCount, HandleUpdateConsumersCount); + hFunc(TEvS3FileQueue::TEvGetNextBatch, HandleGetNextBatch); hFunc(TEvPrivatePrivate::TEvNextListingChunkReceived, HandleNextListingChunkReceived); - cFunc(TEvents::TSystem::Poison, PassAway); + cFunc(TEvPrivatePrivate::EvRoundRobinStageTimeout, HandleRoundRobinStageTimeout); + hFunc(TEvPrivatePrivate::TEvTransitToErrorState, HandleTransitToErrorState); + cFunc(TEvents::TSystem::Poison, HandlePoison); default: MaybeIssues = TIssues{TIssue{TStringBuilder() << "An event with unknown type has been received: '" << etype << "'"}}; TransitToErrorState(); @@ -383,16 +460,14 @@ class TS3FileQueueActor : public TActorBootstrapped { } } - void HandleGetNextFile(TEvPrivatePrivate::TEvGetNextFile::TPtr& ev) { - auto requestAmount = ev->Get()->RequestedAmount; - LOG_D("TS3FileQueueActor", "HandleGetNextFile requestAmount:" << requestAmount); - if (Objects.size() > requestAmount) { - LOG_D("TS3FileQueueActor", "HandleGetNextFile sending right away"); - SendObjects(ev->Sender, requestAmount); + void HandleGetNextBatch(TEvS3FileQueue::TEvGetNextBatch::TPtr& ev) { + if (HasEnoughToSend()) { + LOG_D("TS3FileQueueActor", "HandleGetNextBatch sending right away"); + TrySendObjects(ev->Sender, ev->Get()->Record.GetTransportMeta()); TryPreFetch(); } else { - LOG_D("TS3FileQueueActor", "HandleGetNextFile have not enough objects cached. Start fetching"); - RequestQueue.emplace_back(ev->Sender, requestAmount); + LOG_D("TS3FileQueueActor", "HandleGetNextBatch have not enough objects cached. Start fetching"); + ScheduleRequest(ev->Sender, ev->Get()->Record.GetTransportMeta()); TryFetch(); } } @@ -402,12 +477,12 @@ class TS3FileQueueActor : public TActorBootstrapped { ListingFuture = Nothing(); LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived"); if (SaveRetrievedResults(ev->Get()->ListingResult)) { - AnswerPendingRequests(); - if (RequestQueue.empty()) { - LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived RequestQueue is empty. Trying to prefetch"); + AnswerPendingRequests(true); + if (!HasPendingRequests) { + LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived no pending requests. Trying to prefetch"); TryPreFetch(); } else { - LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived RequestQueue is not empty. Fetching more objects"); + LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived there are pending requests. Fetching more objects"); TryFetch(); } } else { @@ -415,6 +490,11 @@ class TS3FileQueueActor : public TActorBootstrapped { } } + void HandleTransitToErrorState(TEvPrivatePrivate::TEvTransitToErrorState::TPtr& ev) { + MaybeIssues = ev->Get()->Issues; + TransitToErrorState(); + } + bool SaveRetrievedResults(const NS3Lister::TListResult& listingResult) { LOG_T("TS3FileQueueActor", "SaveRetrievedResults"); if (std::holds_alternative(listingResult)) { @@ -440,56 +520,28 @@ class TS3FileQueueActor : public TActorBootstrapped { return false; } LOG_T("TS3FileQueueActor", "SaveRetrievedResults adding path: " << object.Path); - Objects.emplace_back(object.Path, object.Size, CurrentDirectoryPathIndex); + TObjectPath objectPath; + objectPath.SetPath(object.Path); + objectPath.SetSize(object.Size); + objectPath.SetPathIndex(CurrentDirectoryPathIndex); + Objects.emplace_back(std::move(objectPath)); + ObjectsTotalSize += object.Size; } return true; } - void AnswerPendingRequests() { - while (!RequestQueue.empty()) { - auto requestToFulfil = std::find_if( - RequestQueue.begin(), - RequestQueue.end(), - [this](auto& val) { return val.second <= Objects.size(); }); - - if (requestToFulfil != RequestQueue.end()) { - auto [actorId, requestedAmount] = *requestToFulfil; - LOG_T( - "TS3FileQueueActor", - "AnswerPendingRequests responding to " - << requestToFulfil->first << " with " << requestToFulfil->second - << " items"); - SendObjects(actorId, requestedAmount); - RequestQueue.erase(requestToFulfil); - } else { - LOG_T( - "TS3FileQueueActor", - "AnswerPendingRequests no more pending requests to fulfil"); - break; - } - } - } - bool FetchingInProgress() const { return ListingFuture.Defined(); } void TransitToNoMoreDirectoriesToListState() { LOG_I("TS3FileQueueActor", "TransitToNoMoreDirectoriesToListState no more directories to list"); - for (auto& [requestorId, size]: RequestQueue) { - SendObjects(requestorId, size); - } - RequestQueue.clear(); + AnswerPendingRequests(); Become(&TS3FileQueueActor::NoMoreDirectoriesState); } void TransitToErrorState() { Y_ENSURE(MaybeIssues.Defined()); LOG_I("TS3FileQueueActor", "TransitToErrorState an error occurred sending "); - for (auto& [requestorId, _]: RequestQueue) { - Send( - requestorId, - std::make_unique(*MaybeIssues)); - } - RequestQueue.clear(); + AnswerPendingRequests(); Objects.clear(); Directories.clear(); Become(&TS3FileQueueActor::AnErrorOccurredState); @@ -498,8 +550,10 @@ class TS3FileQueueActor : public TActorBootstrapped { STATEFN(NoMoreDirectoriesState) { try { switch (const auto etype = ev->GetTypeRewrite()) { - hFunc(TEvPrivatePrivate::TEvGetNextFile, HandleGetNextFileForEmptyState); - cFunc(TEvents::TSystem::Poison, PassAway); + hFunc(TEvS3FileQueue::TEvUpdateConsumersCount, HandleUpdateConsumersCount); + hFunc(TEvS3FileQueue::TEvGetNextBatch, HandleGetNextBatchForEmptyState); + cFunc(TEvPrivatePrivate::EvRoundRobinStageTimeout, HandleRoundRobinStageTimeout); + cFunc(TEvents::TSystem::Poison, HandlePoison); default: MaybeIssues = TIssues{TIssue{TStringBuilder() << "An event with unknown type has been received: '" << etype << "'"}}; TransitToErrorState(); @@ -511,16 +565,20 @@ class TS3FileQueueActor : public TActorBootstrapped { } } - void HandleGetNextFileForEmptyState(TEvPrivatePrivate::TEvGetNextFile::TPtr& ev) { - LOG_D("TS3FileQueueActor", "HandleGetNextFileForEmptyState Giving away rest of Objects"); - SendObjects(ev->Sender, ev->Get()->RequestedAmount); + void HandleGetNextBatchForEmptyState(TEvS3FileQueue::TEvGetNextBatch::TPtr& ev) { + LOG_T( + "TS3FileQueueActor", + "HandleGetNextBatchForEmptyState Giving away rest of Objects"); + TrySendObjects(ev->Sender, ev->Get()->Record.GetTransportMeta()); } STATEFN(AnErrorOccurredState) { try { switch (const auto etype = ev->GetTypeRewrite()) { - hFunc(TEvPrivatePrivate::TEvGetNextFile, HandleGetNextFileForErrorState); - cFunc(TEvents::TSystem::Poison, PassAway); + hFunc(TEvS3FileQueue::TEvUpdateConsumersCount, HandleUpdateConsumersCount); + hFunc(TEvS3FileQueue::TEvGetNextBatch, HandleGetNextBatchForErrorState); + cFunc(TEvPrivatePrivate::EvRoundRobinStageTimeout, HandleRoundRobinStageTimeout); + cFunc(TEvents::TSystem::Poison, HandlePoison); default: MaybeIssues = TIssues{TIssue{TStringBuilder() << "An event with unknown type has been received: '" << etype << "'"}}; break; @@ -530,66 +588,105 @@ class TS3FileQueueActor : public TActorBootstrapped { } } - void HandleGetNextFileForErrorState(TEvPrivatePrivate::TEvGetNextFile::TPtr& ev) { + void HandleGetNextBatchForErrorState(TEvS3FileQueue::TEvGetNextBatch::TPtr& ev) { LOG_D( "TS3FileQueueActor", - "HandleGetNextFileForErrorState Giving away rest of Objects"); - Send(ev->Sender, std::make_unique(*MaybeIssues)); + "HandleGetNextBatchForErrorState Giving away rest of Objects"); + Send(ev->Sender, new TEvS3FileQueue::TEvObjectPathReadError(*MaybeIssues, ev->Get()->Record.GetTransportMeta())); + TryFinish(ev->Sender, ev->Get()->Record.GetTransportMeta().GetSeqNo()); + } + + void HandleUpdateConsumersCount(TEvS3FileQueue::TEvUpdateConsumersCount::TPtr& ev) { + if (!UpdatedConsumers.contains(ev->Sender)) { + LOG_D( + "TS3FileQueueActor", + "HandleUpdateConsumersCount Reducing ConsumersCount by " << ev->Get()->Record.GetConsumersCountDelta() << ", recieved from " << ev->Sender); + UpdatedConsumers.insert(ev->Sender); + ConsumersCount -= ev->Get()->Record.GetConsumersCountDelta(); + } + Send(ev->Sender, new TEvS3FileQueue::TEvAck(ev->Get()->Record.GetTransportMeta())); + } + + void HandleRoundRobinStageTimeout() { + LOG_T("TS3FileQueueActor","Handle start stage timeout"); + if (!RoundRobinStageFinished) { + RoundRobinStageFinished = true; + AnswerPendingRequests(); + } } - void PassAway() override { - if (!MaybeIssues.Defined()) { - for (auto& [requestorId, size]: RequestQueue) { - SendObjects(requestorId, size); - } - } else { - for (auto& [requestorId, _]: RequestQueue) { - Send( - requestorId, - std::make_unique(*MaybeIssues)); - } - } + void HandlePoison() { + AnswerPendingRequests(); + PassAway(); + } - RequestQueue.clear(); - Objects.clear(); - Directories.clear(); + void PassAway() override { + LOG_D("TS3FileQueueActor", "PassAway"); TBase::PassAway(); } private: - void SendObjects(const TActorId& recipient, size_t amount) { + void TrySendObjects(const TActorId& consumer, const NDqProto::TMessageTransportMeta& transportMeta) { + if (CanSendToConsumer(consumer)) { + SendObjects(consumer, transportMeta); + } else { + ScheduleRequest(consumer, transportMeta); + } + } + + void SendObjects(const TActorId& consumer, const NDqProto::TMessageTransportMeta& transportMeta) { Y_ENSURE(!MaybeIssues.Defined()); - size_t correctedAmount = std::min(amount, Objects.size()); std::vector result; - if (correctedAmount != 0) { - result.reserve(correctedAmount); - for (size_t i = 0; i < correctedAmount; ++i) { + if (Objects.size() > 0) { + size_t totalSize = 0; + do { result.push_back(Objects.back()); Objects.pop_back(); + totalSize += result.back().GetSize(); + } while (Objects.size() > 0 && result.size() < BatchObjectCountLimit && totalSize < BatchSizeLimit); + ObjectsTotalSize -= totalSize; + } + + LOG_T("TS3FileQueueActor", "SendObjects Sending " << result.size() << " objects to consumer with id " << consumer); + Send(consumer, new TEvS3FileQueue::TEvObjectPathBatch(std::move(result), HasNoMoreItems(), transportMeta)); + + if (HasNoMoreItems()) { + TryFinish(consumer, transportMeta.GetSeqNo()); + } + + if (!RoundRobinStageFinished) { + if (StartedConsumers.empty()) { + Schedule(RoundRobinStageTimeout, new TEvPrivatePrivate::TEvRoundRobinStageTimeout()); + } + StartedConsumers.insert(consumer); + if ((StartedConsumers.size() == ConsumersCount || HasNoMoreItems()) && !IsRoundRobinFinishScheduled) { + IsRoundRobinFinishScheduled = true; + Send(SelfId(), new TEvPrivatePrivate::TEvRoundRobinStageTimeout()); } } + } - LOG_T( - "TS3FileQueueActor", - "SendObjects amount: " << amount << " correctedAmount: " << correctedAmount - << " result size: " << result.size()); + bool HasEnoughToSend() { + return Objects.size() >= BatchObjectCountLimit || ObjectsTotalSize >= BatchSizeLimit; + } - Send( - recipient, - std::make_unique( - std::move(result), HasNoMoreItems())); + bool CanSendToConsumer(const TActorId& consumer) { + return !UseRuntimeListing || RoundRobinStageFinished || + (StartedConsumers.size() < ConsumersCount && !StartedConsumers.contains(consumer)); } + bool HasNoMoreItems() const { return !(MaybeLister.Defined() && (*MaybeLister)->HasNext()) && Directories.empty() && Objects.empty(); } - bool TryPreFetch () { + bool TryPreFetch() { if (Objects.size() < PrefetchSize) { return TryFetch(); } return false; } + bool TryFetch() { if (FetchingInProgress()) { LOG_D("TS3FileQueueActor", "TryFetch fetching already in progress"); @@ -605,9 +702,9 @@ class TS3FileQueueActor : public TActorBootstrapped { if (!Directories.empty()) { LOG_D("TS3FileQueueActor", "TryFetch fetching from new lister"); - auto [path, size, pathIndex] = Directories.back(); + auto object = Directories.back(); Directories.pop_back(); - CurrentDirectoryPathIndex = pathIndex; + CurrentDirectoryPathIndex = object.GetPathIndex(); MaybeLister = NS3Lister::MakeS3Lister( Gateway, NS3Lister::TListingRequest{ @@ -615,9 +712,9 @@ class TS3FileQueueActor : public TActorBootstrapped { AuthInfo, PatternVariant == ES3PatternVariant::PathPattern ? Pattern - : TStringBuilder{} << path << Pattern, + : TStringBuilder{} << object.GetPath() << Pattern, PatternType, - path}, + object.GetPath()}, Nothing(), false); Fetch(); @@ -629,6 +726,7 @@ class TS3FileQueueActor : public TActorBootstrapped { TransitToNoMoreDirectoriesToListState(); return false; } + void Fetch() { Y_ENSURE(!ListingFuture.Defined()); Y_ENSURE(MaybeLister.Defined()); @@ -638,12 +736,71 @@ class TS3FileQueueActor : public TActorBootstrapped { ->Next() .Subscribe([actorSystem, selfId = SelfId()]( const NThreading::TFuture& future) { - actorSystem->Send( - selfId, - new TEvPrivatePrivate::TEvNextListingChunkReceived( - future.GetValue())); + try { + actorSystem->Send( + selfId, + new TEvPrivatePrivate::TEvNextListingChunkReceived( + future.GetValue())); + } catch (const std::exception& e) { + actorSystem->Send( + selfId, + new TEvPrivatePrivate::TEvTransitToErrorState( + TIssues{TIssue{TStringBuilder() << "An unknown exception has occurred: '" << e.what() << "'"}})); + } }); } + + void ScheduleRequest(const TActorId& consumer, const TMessageTransportMeta& transportMeta) { + PendingRequests[consumer].push_back(transportMeta); + HasPendingRequests = true; + } + + void AnswerPendingRequests(bool earlyStop = false) { + bool handledRequest = true; + while (HasPendingRequests && handledRequest) { + bool isEmpty = true; + handledRequest = false; + for (auto& [consumer, requests] : PendingRequests) { + if (!CanSendToConsumer(consumer) || (earlyStop && !HasEnoughToSend())) { + if (!requests.empty()) { + isEmpty = false; + } + continue; + } + if (!requests.empty()) { + if (!MaybeIssues.Defined()) { + SendObjects(consumer, requests.front()); + } else { + Send(consumer, new TEvS3FileQueue::TEvObjectPathReadError(*MaybeIssues, requests.front())); + TryFinish(consumer, requests.front().GetSeqNo()); + } + requests.pop_front(); + handledRequest = true; + } + if (!requests.empty()) { + isEmpty = false; + } + } + if (isEmpty) { + HasPendingRequests = false; + } + } + } + + void TryFinish(const TActorId& consumer, ui64 seqNo) { + LOG_T("TS3FileQueueActor", "TryFinish from consumer " << consumer << ", " << FinishedConsumers.size() << " consumers already finished, seqNo=" << seqNo); + if (FinishingConsumerToLastSeqNo.contains(consumer)) { + LOG_T("TS3FileQueueActor", "TryFinish FinishingConsumerToLastSeqNo=" << FinishingConsumerToLastSeqNo[consumer]); + if (FinishingConsumerToLastSeqNo[consumer] < seqNo || SelfId().NodeId() == consumer.NodeId()) { + FinishedConsumers.insert(consumer); + if (FinishedConsumers.size() == ConsumersCount) { + PassAway(); + } + } + } else { + FinishingConsumerToLastSeqNo[consumer] = seqNo; + } + } private: const TTxId TxId; @@ -656,8 +813,20 @@ class TS3FileQueueActor : public TActorBootstrapped { TMaybe MaybeLister = Nothing(); TMaybe> ListingFuture; size_t CurrentDirectoryPathIndex = 0; - std::deque> RequestQueue; + THashMap> PendingRequests; TMaybe MaybeIssues; + bool UseRuntimeListing; + ui64 ConsumersCount; + ui64 BatchSizeLimit; + ui64 BatchObjectCountLimit; + ui64 ObjectsTotalSize = 0; + THashMap FinishingConsumerToLastSeqNo; + THashSet FinishedConsumers; + bool RoundRobinStageFinished = false; + bool IsRoundRobinFinishScheduled = false; + bool HasPendingRequests = false; + THashSet StartedConsumers; + THashSet UpdatedConsumers; const IHTTPGateway::TPtr Gateway; const TString Url; @@ -665,6 +834,9 @@ class TS3FileQueueActor : public TActorBootstrapped { const TString Pattern; const ES3PatternVariant PatternVariant; const ES3PatternType PatternType; + + static constexpr TDuration PoisonTimeout = TDuration::Hours(3); + static constexpr TDuration RoundRobinStageTimeout = TDuration::Seconds(3); }; ui64 SubtractSaturating(ui64 lhs, ui64 rhs) { @@ -691,7 +863,12 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA ::NMonitoring::TDynamicCounterPtr counters, ::NMonitoring::TDynamicCounterPtr taskCounters, ui64 fileSizeLimit, - std::optional rowsLimitHint) + std::optional rowsLimitHint, + bool useRuntimeListing, + TActorId fileQueueActor, + ui64 fileQueueBatchSizeLimit, + ui64 fileQueueBatchObjectCountLimit, + ui64 fileQueueConsumersCountDelta) : ReadActorFactoryCfg(readActorFactoryCfg) , Gateway(std::move(gateway)) , HolderFactory(holderFactory) @@ -705,12 +882,17 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA , Pattern(pattern) , PatternVariant(patternVariant) , Paths(std::move(paths)) + , FileQueueActor(fileQueueActor) , AddPathIndex(addPathIndex) , SizeLimit(sizeLimit) , Counters(counters) , TaskCounters(taskCounters) , FileSizeLimit(fileSizeLimit) - , FilesRemained(rowsLimitHint) { + , FilesRemained(rowsLimitHint) + , UseRuntimeListing(useRuntimeListing) + , FileQueueBatchSizeLimit(fileQueueBatchSizeLimit) + , FileQueueBatchObjectCountLimit(fileQueueBatchObjectCountLimit) + , FileQueueConsumersCountDelta(fileQueueConsumersCountDelta) { if (Counters) { QueueDataSize = Counters->GetCounter("QueueDataSize"); QueueDataLimit = Counters->GetCounter("QueueDataLimit"); @@ -726,27 +908,46 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA } void Bootstrap() { - LOG_D("TS3ReadActor", "Bootstrap" << ", InputIndex: " << InputIndex); - FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ - TxId, - std::move(Paths), - ReadActorFactoryCfg.MaxInflight * 2, - FileSizeLimit, - Gateway, - Url, - AuthInfo, - Pattern, - PatternVariant, - ES3PatternType::Wildcard}); - SendPathRequest(); + if (!UseRuntimeListing) { + FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ + TxId, + std::move(Paths), + ReadActorFactoryCfg.MaxInflight * 2, + FileSizeLimit, + false, + 1, + FileQueueBatchSizeLimit, + FileQueueBatchObjectCountLimit, + Gateway, + Url, + AuthInfo, + Pattern, + PatternVariant, + ES3PatternType::Wildcard}); + } + + LOG_D("TS3ReadActor", "Bootstrap" << ", InputIndex: " << InputIndex << ", FileQueue: " << FileQueueActor << (UseRuntimeListing ? " (remote)" : " (local")); + + FileQueueEvents.Init(TxId, SelfId(), SelfId()); + FileQueueEvents.OnNewRecipientId(FileQueueActor); + if (UseRuntimeListing && FileQueueConsumersCountDelta > 0) { + FileQueueEvents.Send(new TEvS3FileQueue::TEvUpdateConsumersCount(FileQueueConsumersCountDelta)); + } + SendPathBatchRequest(); + Become(&TS3ReadActor::StateFunc); } bool TryStartDownload() { - if (ObjectPathCache.empty()) { + TrySendPathBatchRequest(); + if (PathBatchQueue.empty()) { // no path is pending return false; } + if (IsCurrentBatchEmpty) { + // waiting for batch to finish + return false; + } if (QueueTotalDataSize > ReadActorFactoryCfg.DataInflight) { // too large data inflight return false; @@ -766,38 +967,42 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA void StartDownload() { DownloadInflight++; - const auto& [path, size, index] = ReadPathFromCache(); - auto url = Url + path; - auto id = index; + const auto& object = ReadPathFromCache(); + auto url = Url + object.GetPath(); + auto id = object.GetPathIndex(); const TString requestId = CreateGuidAsString(); LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]"); Gateway->Download( UrlEscapeRet(url, true), IHTTPGateway::MakeYcHeaders(requestId, AuthInfo.GetToken(), {}, AuthInfo.GetAwsUserPwd(), AuthInfo.GetAwsSigV4()), 0U, - std::min(size, SizeLimit), - std::bind(&TS3ReadActor::OnDownloadFinished, ActorSystem, SelfId(), requestId, std::placeholders::_1, id, path), + std::min(object.GetSize(), SizeLimit), + std::bind(&TS3ReadActor::OnDownloadFinished, ActorSystem, SelfId(), requestId, std::placeholders::_1, id, object.GetPath()), {}, RetryPolicy); } TObjectPath ReadPathFromCache() { - Y_ENSURE(!ObjectPathCache.empty()); - auto object = ObjectPathCache.back(); - ObjectPathCache.pop_back(); - if (ObjectPathCache.empty() && !IsObjectQueueEmpty && !ConsumedEnoughFiles()) { - SendPathRequest(); - } + Y_ENSURE(!PathBatchQueue.empty()); + auto& currentBatch = PathBatchQueue.front(); + Y_ENSURE(!currentBatch.empty()); + auto object = currentBatch.back(); + currentBatch.pop_back(); + if (currentBatch.empty()) { + PathBatchQueue.pop_front(); + IsCurrentBatchEmpty = true; + } + TrySendPathBatchRequest(); return object; } - void SendPathRequest() { - Y_ENSURE(!IsWaitingObjectQueueResponse); - const ui64 requestedAmount = std::min(ReadActorFactoryCfg.MaxInflight, FilesRemained.value_or(std::numeric_limits::max())); - Send( - FileQueueActor, - std::make_unique( - requestedAmount)); - IsWaitingObjectQueueResponse = true; + void TrySendPathBatchRequest() { + if (PathBatchQueue.size() < 2 && !IsFileQueueEmpty && !ConsumedEnoughFiles() && !IsWaitingFileQueueResponse) { + SendPathBatchRequest(); + } + } + void SendPathBatchRequest() { + FileQueueEvents.Send(new TEvS3FileQueue::TEvGetNextBatch()); + IsWaitingFileQueueResponse = true; } static constexpr char ActorName[] = "S3_READ_ACTOR"; @@ -823,35 +1028,72 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA return FilesRemained && (*FilesRemained == 0); } - STRICT_STFUNC(StateFunc, + STRICT_STFUNC_EXC(StateFunc, hFunc(TEvPrivate::TEvReadResult, Handle); hFunc(TEvPrivate::TEvReadError, Handle); - hFunc(TEvPrivate::TEvObjectPathBatch, HandleObjectPathBatch); - hFunc(TEvPrivate::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(TEvS3FileQueue::TEvObjectPathBatch, HandleObjectPathBatch); + hFunc(TEvS3FileQueue::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(TEvS3FileQueue::TEvAck, HandleAck); + hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); + , catch (const std::exception& e) { + TIssues issues{TIssue{TStringBuilder() << "An unknown exception has occurred: '" << e.what() << "'"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } ) - void HandleObjectPathBatch(TEvPrivate::TEvObjectPathBatch::TPtr& objectPathBatch) { - Y_ENSURE(IsWaitingObjectQueueResponse); - IsWaitingObjectQueueResponse = false; - ListedFiles += objectPathBatch->Get()->ObjectPaths.size(); - IsObjectQueueEmpty = objectPathBatch->Get()->NoMoreFiles; - ObjectPathCache.insert( - ObjectPathCache.end(), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.begin()), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.end())); + void HandleObjectPathBatch(TEvS3FileQueue::TEvObjectPathBatch::TPtr& objectPathBatch) { + if (!FileQueueEvents.OnEventReceived(objectPathBatch)) { + LOG_W("TS3ReadActor", "Duplicated TEvObjectPathBatch (likely resent) from " << FileQueueActor); + return; + } + + Y_ENSURE(IsWaitingFileQueueResponse); + IsWaitingFileQueueResponse = false; + auto& objectBatch = objectPathBatch->Get()->Record; + ListedFiles += objectBatch.GetObjectPaths().size(); + IsFileQueueEmpty = objectBatch.GetNoMoreFiles(); + if (IsFileQueueEmpty && !IsConfirmedFileQueueFinish) { + LOG_D("TS3ReadActor", "Confirm finish to " << FileQueueActor); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + if (!objectBatch.GetObjectPaths().empty()) { + PathBatchQueue.emplace_back( + std::make_move_iterator(objectBatch.MutableObjectPaths()->begin()), + std::make_move_iterator(objectBatch.MutableObjectPaths()->end())); + } while (TryStartDownload()) {} if (LastFileWasProcessed()) { Send(ComputeActorId, new TEvNewAsyncInputDataArrived(InputIndex)); } } - void HandleObjectPathReadError(TEvPrivate::TEvObjectPathReadError::TPtr& result) { - IsObjectQueueEmpty = true; - LOG_E("TS3ReadActor", "Error while object listing, details: TEvObjectPathReadError: " << result->Get()->Issues.ToOneLineString()); - auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", TIssues{result->Get()->Issues}); + void HandleObjectPathReadError(TEvS3FileQueue::TEvObjectPathReadError::TPtr& result) { + if (!FileQueueEvents.OnEventReceived(result)) { + LOG_W("TS3ReadActor", "Duplicated TEvObjectPathReadError (likely resent) from " << FileQueueActor); + return; + } + + IsFileQueueEmpty = true; + if (!IsConfirmedFileQueueFinish) { + LOG_D("TS3ReadActor", "Confirm finish (with errors) to " << FileQueueActor); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + TIssues issues; + IssuesFromMessage(result->Get()->Record.GetIssues(), issues); + LOG_E("TS3ReadActor", "Error while object listing, details: TEvObjectPathReadError: " << issues.ToOneLineString()); + issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", std::move(issues)); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); } + void HandleAck(TEvS3FileQueue::TEvAck::TPtr& ev) { + FileQueueEvents.OnEventReceived(ev); + } + static void OnDownloadFinished(TActorSystem* actorSystem, TActorId selfId, const TString& requestId, IHTTPGateway::TResult&& result, size_t pathInd, const TString path) { if (!result.Issues) { actorSystem->Send(new IEventHandle(selfId, TActorId(), new TEvPrivate::TEvReadResult(std::move(result.Content), requestId, pathInd, path))); @@ -892,7 +1134,7 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA } while (!Blocks.empty() && freeSpace > 0LL); } - if (LastFileWasProcessed() || ConsumedEnoughFiles()) { + if ((LastFileWasProcessed() || ConsumedEnoughFiles()) && !FileQueueEvents.RemoveConfirmedEvents()) { finished = true; ContainerCache.Clear(); } @@ -904,7 +1146,7 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA return total; } bool LastFileWasProcessed() const { - return Blocks.empty() && (ListedFiles == CompletedFiles) && IsObjectQueueEmpty; + return Blocks.empty() && (ListedFiles == CompletedFiles) && IsFileQueueEmpty; } void Handle(TEvPrivate::TEvReadResult::TPtr& result) { @@ -934,6 +1176,9 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA } Blocks.emplace(std::make_tuple(std::move(result->Get()->Result), id)); DownloadInflight--; + if (IsCurrentBatchEmpty && DownloadInflight == 0) { + IsCurrentBatchEmpty = false; + } if (FilesRemained) { *FilesRemained = SubtractSaturating(*FilesRemained, 1); } @@ -960,6 +1205,28 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while reading file " << path << " with request id [" << requestId << "]", TIssues{result->Get()->Error}); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); } + + void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&) { + FileQueueEvents.Retry(); + } + + void Handle(NActors::TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { + LOG_T("TS3ReadActor", "Handle disconnected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeDisconnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvInterconnect::TEvNodeConnected::TPtr& ev) { + LOG_T("TS3ReadActor", "Handle connected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeConnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + LOG_T("TS3ReadActor", "Handle undelivered FileQueue "); + if (!FileQueueEvents.HandleUndelivered(ev)) { + TIssues issues{TIssue{TStringBuilder() << "FileQueue was lost"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } + } // IActor & IDqComputeActorAsyncInput void PassAway() override { // Is called from Compute Actor @@ -977,7 +1244,7 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA QueueTotalDataSize = 0; ContainerCache.Clear(); - Send(FileQueueActor, new NActors::TEvents::TEvPoison()); + FileQueueEvents.Unsubscribe(); TActorBootstrapped::PassAway(); } @@ -1000,9 +1267,6 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA const TString Pattern; const ES3PatternVariant PatternVariant; TPathList Paths; - std::vector ObjectPathCache; - bool IsObjectQueueEmpty = false; - bool IsWaitingObjectQueueResponse = false; size_t ListedFiles = 0; size_t CompletedFiles = 0; NActors::TActorId FileQueueActor; @@ -1026,6 +1290,17 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA ui64 DownloadInflight = 0; const ui64 FileSizeLimit; std::optional FilesRemained; + + bool UseRuntimeListing; + ui64 FileQueueBatchSizeLimit; + ui64 FileQueueBatchObjectCountLimit; + ui64 FileQueueConsumersCountDelta; + bool IsFileQueueEmpty = false; + bool IsCurrentBatchEmpty = false; + bool IsWaitingFileQueueResponse = false; + bool IsConfirmedFileQueueFinish = false; + TRetryEventsQueue FileQueueEvents; + TDeque> PathBatchQueue; }; struct TReadSpec { @@ -1554,7 +1829,7 @@ class TS3ReadCoroImpl : public TActorCoroImpl { if (it != RangeCache.end()) { return it->second; } - RetryStuff->Gateway->Download(Url + Path, RetryStuff->Headers, + RetryStuff->Gateway->Download(RetryStuff->Url, RetryStuff->Headers, range.Offset, range.Length, std::bind(&OnResult, GetActorSystem(), SelfActorId, range, ++RangeCookie, std::placeholders::_1), @@ -2260,7 +2535,12 @@ class TS3StreamReadActor : public TActorBootstrapped, public ::NMonitoring::TDynamicCounterPtr taskCounters, ui64 fileSizeLimit, std::optional rowsLimitHint, - IMemoryQuotaManager::TPtr memoryQuotaManager + IMemoryQuotaManager::TPtr memoryQuotaManager, + bool useRuntimeListing, + TActorId fileQueueActor, + ui64 fileQueueBatchSizeLimit, + ui64 fileQueueBatchObjectCountLimit, + ui64 fileQueueConsumersCountDelta ) : ReadActorFactoryCfg(readActorFactoryCfg) , Gateway(std::move(gateway)) , HolderFactory(holderFactory) @@ -2278,8 +2558,13 @@ class TS3StreamReadActor : public TActorBootstrapped, public , ReadSpec(readSpec) , Counters(std::move(counters)) , TaskCounters(std::move(taskCounters)) + , FileQueueActor(fileQueueActor) , FileSizeLimit(fileSizeLimit) - , MemoryQuotaManager(memoryQuotaManager) { + , MemoryQuotaManager(memoryQuotaManager) + , UseRuntimeListing(useRuntimeListing) + , FileQueueBatchSizeLimit(fileQueueBatchSizeLimit) + , FileQueueBatchObjectCountLimit(fileQueueBatchObjectCountLimit) + , FileQueueConsumersCountDelta(fileQueueConsumersCountDelta) { if (Counters) { QueueDataSize = Counters->GetCounter("QueueDataSize"); QueueDataLimit = Counters->GetCounter("QueueDataLimit"); @@ -2326,27 +2611,45 @@ class TS3StreamReadActor : public TActorBootstrapped, public TaskDownloadPaused, TaskChunkDownloadCount, DecodedChunkSizeHist); - FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ - TxId, - std::move(Paths), - ReadActorFactoryCfg.MaxInflight * 2, - FileSizeLimit, - Gateway, - Url, - AuthInfo, - Pattern, - PatternVariant, - ES3PatternType::Wildcard}); - SendPathRequest(); + + if (!UseRuntimeListing) { + FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ + TxId, + std::move(Paths), + ReadActorFactoryCfg.MaxInflight * 2, + FileSizeLimit, + false, + 1, + FileQueueBatchSizeLimit, + FileQueueBatchObjectCountLimit, + Gateway, + Url, + AuthInfo, + Pattern, + PatternVariant, + ES3PatternType::Wildcard}); + } + FileQueueEvents.Init(TxId, SelfId(), SelfId()); + FileQueueEvents.OnNewRecipientId(FileQueueActor); + if (UseRuntimeListing && FileQueueConsumersCountDelta > 0) { + FileQueueEvents.Send(new TEvS3FileQueue::TEvUpdateConsumersCount(FileQueueConsumersCountDelta)); + } + SendPathBatchRequest(); + Become(&TS3StreamReadActor::StateFunc); Bootstrapped = true; } bool TryRegisterCoro() { - if (ObjectPathCache.empty()) { + TrySendPathBatchRequest(); + if (PathBatchQueue.empty()) { // no path is pending return false; } + if (IsCurrentBatchEmpty) { + // waiting for batch to finish + return false; + } if (QueueBufferCounter->IsFull()) { // too large data inflight return false; @@ -2378,24 +2681,24 @@ class TS3StreamReadActor : public TActorBootstrapped, public if (TaskCounters) { TaskDownloadCount->Inc(); } - const auto& objectPath = ReadPathFromCache(); - DownloadSize += objectPath.Size; + const auto& object = ReadPathFromCache(); + DownloadSize += object.GetSize(); const TString requestId = CreateGuidAsString(); auto stuff = std::make_shared( Gateway, - Url + objectPath.Path, + Url + object.GetPath(), IHTTPGateway::MakeYcHeaders(requestId, AuthInfo.GetToken(), {}, AuthInfo.GetAwsUserPwd(), AuthInfo.GetAwsSigV4()), - objectPath.Size, + object.GetSize(), TxId, requestId, RetryPolicy); - auto pathIndex = objectPath.PathIndex; + auto pathIndex = object.GetPathIndex(); if (TaskCounters) { HttpInflightLimit->Add(Gateway->GetBuffersSizePerStream()); } LOG_D( "TS3StreamReadActor", - "RegisterCoro with path " << objectPath.Path << " with pathIndex " + "RegisterCoro with path " << object.GetPath() << " with pathIndex " << pathIndex); auto impl = MakeHolder( InputIndex, @@ -2404,7 +2707,7 @@ class TS3StreamReadActor : public TActorBootstrapped, public std::move(stuff), ReadSpec, pathIndex, - objectPath.Path, + object.GetPath(), Url, RowsRemained, ReadActorFactoryCfg, @@ -2419,22 +2722,26 @@ class TS3StreamReadActor : public TActorBootstrapped, public } TObjectPath ReadPathFromCache() { - Y_ENSURE(!ObjectPathCache.empty()); - auto object = ObjectPathCache.back(); - ObjectPathCache.pop_back(); - if (ObjectPathCache.empty() && !IsObjectQueueEmpty) { - SendPathRequest(); - } + Y_ENSURE(!PathBatchQueue.empty()); + auto& currentBatch = PathBatchQueue.front(); + Y_ENSURE(!currentBatch.empty()); + auto object = currentBatch.back(); + currentBatch.pop_back(); + if (currentBatch.empty()) { + PathBatchQueue.pop_front(); + IsCurrentBatchEmpty = true; + } + TrySendPathBatchRequest(); return object; } - void SendPathRequest() { - Y_ENSURE(!IsWaitingObjectQueueResponse); - LOG_D("TS3StreamReadActor", "SendPathRequest " << ReadActorFactoryCfg.MaxInflight); - Send( - FileQueueActor, - std::make_unique( - ReadActorFactoryCfg.MaxInflight)); - IsWaitingObjectQueueResponse = true; + void TrySendPathBatchRequest() { + if (PathBatchQueue.size() < 2 && !IsFileQueueEmpty && !IsWaitingFileQueueResponse) { + SendPathBatchRequest(); + } + } + void SendPathBatchRequest() { + FileQueueEvents.Send(new TEvS3FileQueue::TEvGetNextBatch()); + IsWaitingFileQueueResponse = true; } static constexpr char ActorName[] = "S3_STREAM_READ_ACTOR"; @@ -2528,7 +2835,7 @@ class TS3StreamReadActor : public TActorBootstrapped, public TryRegisterCoro(); } while (!Blocks.empty() && free > 0LL && GetBlockSize(Blocks.front()) <= size_t(free)); - finished = ConsumedEnoughRows() || LastFileWasProcessed(); + finished = (ConsumedEnoughRows() || LastFileWasProcessed()) && !FileQueueEvents.RemoveConfirmedEvents(); if (finished) { ContainerCache.Clear(); ArrowTupleContainerCache.Clear(); @@ -2560,7 +2867,8 @@ class TS3StreamReadActor : public TActorBootstrapped, public for (const auto actorId : CoroActors) { Send(actorId, new NActors::TEvents::TEvPoison()); } - Send(FileQueueActor, new NActors::TEvents::TEvPoison()); + LOG_T("TS3StreamReadActor", "PassAway FileQueue RemoveConfirmedEvents=" << FileQueueEvents.RemoveConfirmedEvents()); + FileQueueEvents.Unsubscribe(); ContainerCache.Clear(); ArrowTupleContainerCache.Clear(); @@ -2575,36 +2883,69 @@ class TS3StreamReadActor : public TActorBootstrapped, public TActorBootstrapped::PassAway(); } - STRICT_STFUNC(StateFunc, + STRICT_STFUNC_EXC(StateFunc, hFunc(TEvPrivate::TEvRetryEventFunc, HandleRetry); hFunc(TEvPrivate::TEvNextBlock, HandleNextBlock); hFunc(TEvPrivate::TEvNextRecordBatch, HandleNextRecordBatch); hFunc(TEvPrivate::TEvFileFinished, HandleFileFinished); - hFunc(TEvPrivate::TEvObjectPathBatch, HandleObjectPathBatch); - hFunc(TEvPrivate::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(TEvS3FileQueue::TEvAck, Handle); + hFunc(TEvS3FileQueue::TEvObjectPathBatch, HandleObjectPathBatch); + hFunc(TEvS3FileQueue::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); + , catch (const std::exception& e) { + TIssues issues{TIssue{TStringBuilder() << "An unknown exception has occurred: '" << e.what() << "'"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } ) - void HandleObjectPathBatch(TEvPrivate::TEvObjectPathBatch::TPtr& objectPathBatch) { - Y_ENSURE(IsWaitingObjectQueueResponse); - IsWaitingObjectQueueResponse = false; - ListedFiles += objectPathBatch->Get()->ObjectPaths.size(); - IsObjectQueueEmpty = objectPathBatch->Get()->NoMoreFiles; + void HandleObjectPathBatch(TEvS3FileQueue::TEvObjectPathBatch::TPtr& objectPathBatch) { + if (!FileQueueEvents.OnEventReceived(objectPathBatch)) { + return; + } - ObjectPathCache.insert( - ObjectPathCache.end(), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.begin()), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.end())); + Y_ENSURE(IsWaitingFileQueueResponse); + IsWaitingFileQueueResponse = false; + auto& objectBatch = objectPathBatch->Get()->Record; + ListedFiles += objectBatch.GetObjectPaths().size(); + IsFileQueueEmpty = objectBatch.GetNoMoreFiles(); + if (IsFileQueueEmpty && !IsConfirmedFileQueueFinish) { + LOG_T("TS3StreamReadActor", "Sending finish confirmation to FileQueue"); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + if (!objectBatch.GetObjectPaths().empty()) { + PathBatchQueue.emplace_back( + std::make_move_iterator(objectBatch.MutableObjectPaths()->begin()), + std::make_move_iterator(objectBatch.MutableObjectPaths()->end())); + } LOG_D( "TS3StreamReadActor", - "HandleObjectPathBatch " << ObjectPathCache.size() << " IsObjectQueueEmpty " - << IsObjectQueueEmpty << " MaxInflight " << ReadActorFactoryCfg.MaxInflight); + "HandleObjectPathBatch of size " << objectBatch.GetObjectPaths().size()); while (TryRegisterCoro()) {} + + if (LastFileWasProcessed()) { + Send(ComputeActorId, new TEvNewAsyncInputDataArrived(InputIndex)); + } } - void HandleObjectPathReadError(TEvPrivate::TEvObjectPathReadError::TPtr& result) { - IsObjectQueueEmpty = true; - LOG_W("TS3StreamReadActor", "Error while object listing, details: TEvObjectPathReadError: " << result->Get()->Issues.ToOneLineString()); - auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", TIssues{result->Get()->Issues}); + void HandleObjectPathReadError(TEvS3FileQueue::TEvObjectPathReadError::TPtr& result) { + if (!FileQueueEvents.OnEventReceived(result)) { + return; + } + + IsFileQueueEmpty = true; + if (!IsConfirmedFileQueueFinish) { + LOG_T("TS3StreamReadActor", "Sending finish confirmation to FileQueue"); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + TIssues issues; + IssuesFromMessage(result->Get()->Record.GetIssues(), issues); + LOG_W("TS3StreamReadActor", "Error while object listing, details: TEvObjectPathReadError: " << issues.ToOneLineString()); + issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", std::move(issues)); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); } @@ -2646,6 +2987,9 @@ class TS3StreamReadActor : public TActorBootstrapped, public void HandleFileFinished(TEvPrivate::TEvFileFinished::TPtr& ev) { CoroActors.erase(ev->Sender); + if (IsCurrentBatchEmpty && CoroActors.size() == 0) { + IsCurrentBatchEmpty = false; + } if (ev->Get()->IngressDelta) { IngressStats.Bytes += ev->Get()->IngressDelta; IngressStats.Chunks++; @@ -2677,7 +3021,7 @@ class TS3StreamReadActor : public TActorBootstrapped, public } CompletedFiles++; IngressStats.Splits++; - if (!ObjectPathCache.empty()) { + if (!PathBatchQueue.empty()) { TryRegisterCoro(); } else { /* @@ -2690,9 +3034,34 @@ class TS3StreamReadActor : public TActorBootstrapped, public } } } + + void Handle(TEvS3FileQueue::TEvAck::TPtr& ev) { + FileQueueEvents.OnEventReceived(ev); + } + void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&) { + FileQueueEvents.Retry(); + } + + void Handle(NActors::TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { + LOG_T("TS3StreamReadActor", "Handle disconnected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeDisconnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvInterconnect::TEvNodeConnected::TPtr& ev) { + LOG_T("TS3StreamReadActor", "Handle connected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeConnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + LOG_T("TS3StreamReadActor", "Handle undelivered FileQueue "); + if (!FileQueueEvents.HandleUndelivered(ev)) { + TIssues issues{TIssue{TStringBuilder() << "FileQueue was lost"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } + } bool LastFileWasProcessed() const { - return Blocks.empty() && (ListedFiles == CompletedFiles) && IsObjectQueueEmpty; + return Blocks.empty() && (ListedFiles == CompletedFiles) && IsFileQueueEmpty; } void StopLoadsIfEnough(ui64 consumedRows) { @@ -2732,9 +3101,6 @@ class TS3StreamReadActor : public TActorBootstrapped, public const TString Pattern; const ES3PatternVariant PatternVariant; TPathList Paths; - std::vector ObjectPathCache; - bool IsObjectQueueEmpty = false; - bool IsWaitingObjectQueueResponse = false; const bool AddPathIndex; size_t ListedFiles = 0; size_t CompletedFiles = 0; @@ -2768,6 +3134,16 @@ class TS3StreamReadActor : public TActorBootstrapped, public const ui64 FileSizeLimit; bool Bootstrapped = false; IMemoryQuotaManager::TPtr MemoryQuotaManager; + bool UseRuntimeListing; + ui64 FileQueueBatchSizeLimit; + ui64 FileQueueBatchObjectCountLimit; + ui64 FileQueueConsumersCountDelta; + bool IsCurrentBatchEmpty = false; + bool IsFileQueueEmpty = false; + bool IsWaitingFileQueueResponse = false; + bool IsConfirmedFileQueueFinish = false; + TRetryEventsQueue FileQueueEvents; + TDeque> PathBatchQueue; }; using namespace NKikimr::NMiniKQL; @@ -2910,6 +3286,39 @@ NDB::FormatSettings::TimestampFormat ToTimestampFormat(const TString& formatName using namespace NKikimr::NMiniKQL; +IActor* CreateS3FileQueueActor( + TTxId txId, + TPathList paths, + size_t prefetchSize, + ui64 fileSizeLimit, + bool useRuntimeListing, + ui64 consumersCount, + ui64 batchSizeLimit, + ui64 batchObjectCountLimit, + IHTTPGateway::TPtr gateway, + TString url, + TS3Credentials::TAuthInfo authInfo, + TString pattern, + ES3PatternVariant patternVariant, + ES3PatternType patternType) { + return new TS3FileQueueActor( + txId, + paths, + prefetchSize, + fileSizeLimit, + useRuntimeListing, + consumersCount, + batchSizeLimit, + batchObjectCountLimit, + gateway, + url, + authInfo, + pattern, + patternVariant, + patternType + ); +} + std::pair CreateS3ReadActor( const TTypeEnvironment& typeEnv, const THolderFactory& holderFactory, @@ -2981,6 +3390,29 @@ std::pair CreateS3ReadActor( if (params.GetRowsLimitHint() != 0) { rowsLimitHint = params.GetRowsLimitHint(); } + + TActorId fileQueueActor; + if (auto it = settings.find("fileQueueActor"); it != settings.cend()) { + NActorsProto::TActorId protoId; + TMemoryInput inputStream(it->second); + ParseFromTextFormat(inputStream, protoId); + fileQueueActor = ActorIdFromProto(protoId); + } + + ui64 fileQueueBatchSizeLimit; + if (auto it = settings.find("fileQueueBatchSizeLimit"); it != settings.cend()) { + fileQueueBatchSizeLimit = FromString(it->second); + } + + ui64 fileQueueBatchObjectCountLimit; + if (auto it = settings.find("fileQueueBatchObjectCountLimit"); it != settings.cend()) { + fileQueueBatchObjectCountLimit = FromString(it->second); + } + + ui64 fileQueueConsumersCountDelta = 0; + if (readRanges.size() > 1) { + fileQueueConsumersCountDelta = readRanges.size() - 1; + } if (params.HasFormat() && params.HasRowType()) { const auto pb = std::make_unique(typeEnv, functionRegistry); @@ -3086,7 +3518,8 @@ std::pair CreateS3ReadActor( #undef SUPPORTED_FLAGS const auto actor = new TS3StreamReadActor(inputIndex, statsLevel, txId, std::move(gateway), holderFactory, params.GetUrl(), authInfo, pathPattern, pathPatternVariant, std::move(paths), addPathIndex, readSpec, computeActorId, retryPolicy, - cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, memoryQuotaManager); + cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, memoryQuotaManager, + params.GetUseRuntimeListing(), fileQueueActor, fileQueueBatchSizeLimit, fileQueueBatchObjectCountLimit, fileQueueConsumersCountDelta); return {actor, actor}; } else { @@ -3096,7 +3529,8 @@ std::pair CreateS3ReadActor( const auto actor = new TS3ReadActor(inputIndex, statsLevel, txId, std::move(gateway), holderFactory, params.GetUrl(), authInfo, pathPattern, pathPatternVariant, std::move(paths), addPathIndex, computeActorId, sizeLimit, retryPolicy, - cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint); + cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, + params.GetUseRuntimeListing(), fileQueueActor, fileQueueBatchSizeLimit, fileQueueBatchObjectCountLimit, fileQueueConsumersCountDelta); return {actor, actor}; } } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h index f4569b0219d5..2b1ca1adeff6 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h @@ -2,8 +2,10 @@ #include #include +#include "ydb/library/yql/providers/s3/object_listers/yql_s3_list.h" #include #include +#include "ydb/library/yql/providers/s3/range_helpers/path_list_reader.h" #include #include @@ -11,6 +13,22 @@ namespace NYql::NDq { struct TS3ReadActorFactoryConfig; +NActors::IActor* CreateS3FileQueueActor( + TTxId txId, + NS3Details::TPathList paths, + size_t prefetchSize, + ui64 fileSizeLimit, + bool useRuntimeListing, + ui64 consumersCount, + ui64 batchSizeLimit, + ui64 batchObjectCountLimit, + IHTTPGateway::TPtr gateway, + TString url, + TS3Credentials::TAuthInfo authInfo, + TString pattern, + NYql::NS3Lister::ES3PatternVariant patternVariant, + NS3Lister::ES3PatternType patternType); + std::pair CreateS3ReadActor( const NKikimr::NMiniKQL::TTypeEnvironment& typeEnv, const NKikimr::NMiniKQL::THolderFactory& holderFactory, diff --git a/ydb/library/yql/providers/s3/proto/file_queue.proto b/ydb/library/yql/providers/s3/proto/file_queue.proto new file mode 100644 index 000000000000..75ec283f20f2 --- /dev/null +++ b/ydb/library/yql/providers/s3/proto/file_queue.proto @@ -0,0 +1,40 @@ +syntax = "proto3"; +option cc_enable_arenas = true; + +package NYql.NS3.FileQueue; + +import "ydb/library/yql/dq/actors/protos/dq_events.proto"; +import "ydb/public/api/protos/ydb_issue_message.proto"; + +message TEvUpdateConsumersCount { + uint64 ConsumersCountDelta = 1; + + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvAck { + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvGetNextBatch { + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvObjectPathBatch { + bool NoMoreFiles = 1; + repeated TObjectPath ObjectPaths = 2; + + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvObjectPathReadError { + repeated Ydb.Issue.IssueMessage Issues = 1; + + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TObjectPath { + uint64 PathIndex = 1; + uint64 Size = 2; + string Path = 3; +} diff --git a/ydb/library/yql/providers/s3/proto/source.proto b/ydb/library/yql/providers/s3/proto/source.proto index fdf63c74acae..1ac3fd840d12 100644 --- a/ydb/library/yql/providers/s3/proto/source.proto +++ b/ydb/library/yql/providers/s3/proto/source.proto @@ -21,4 +21,5 @@ message TSource { bool RowGroupReordering = 10; uint64 ParallelDownloadCount = 11; uint64 RowsLimitHint = 12; + bool UseRuntimeListing = 13; } diff --git a/ydb/library/yql/providers/s3/proto/ya.make b/ydb/library/yql/providers/s3/proto/ya.make index eb819cddfce5..acb43f749ab3 100644 --- a/ydb/library/yql/providers/s3/proto/ya.make +++ b/ydb/library/yql/providers/s3/proto/ya.make @@ -2,12 +2,18 @@ PROTO_LIBRARY() SRCS( credentials.proto + file_queue.proto range.proto retry_config.proto sink.proto source.proto ) +PEERDIR( + ydb/library/yql/dq/actors/protos + ydb/public/api/protos +) + IF (NOT PY_PROTOS_FOR) EXCLUDE_TAGS(GO_PROTO) ENDIF() diff --git a/ydb/library/yql/providers/s3/provider/ut/ya.make b/ydb/library/yql/providers/s3/provider/ut/ya.make index ffffd526f4cb..50c6132c8c73 100644 --- a/ydb/library/yql/providers/s3/provider/ut/ya.make +++ b/ydb/library/yql/providers/s3/provider/ut/ya.make @@ -4,4 +4,11 @@ SRCS( yql_s3_listing_strategy_ut.cpp ) +PEERDIR( + ydb/library/yql/minikql/dom + ydb/library/yql/parser/pg_wrapper + ydb/library/yql/public/udf + ydb/library/yql/public/udf/service/exception_policy +) + END() diff --git a/ydb/library/yql/providers/s3/provider/ya.make b/ydb/library/yql/providers/s3/provider/ya.make index 4d313be78d64..097c865d1e69 100644 --- a/ydb/library/yql/providers/s3/provider/ya.make +++ b/ydb/library/yql/providers/s3/provider/ya.make @@ -45,6 +45,7 @@ PEERDIR( ydb/library/yql/providers/dq/common ydb/library/yql/providers/dq/expr_nodes ydb/library/yql/providers/result/expr_nodes + ydb/library/yql/providers/s3/actors ydb/library/yql/providers/s3/common ydb/library/yql/providers/s3/expr_nodes ydb/library/yql/providers/s3/object_listers diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp index 56e5a1cd52d8..2601d3971242 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp @@ -29,7 +29,7 @@ void ScanPlanDependencies(const TExprNode::TPtr& input, TExprNode::TListType& ch class TS3DataSinkProvider : public TDataProviderBase { public: - TS3DataSinkProvider(TS3State::TPtr state, IHTTPGateway::TPtr) + TS3DataSinkProvider(TS3State::TPtr state) : State_(state) , TypeAnnotationTransformer_(CreateS3DataSinkTypeAnnotationTransformer(State_)) , ExecutionTransformer_(CreateS3DataSinkExecTransformer(State_)) @@ -137,8 +137,8 @@ class TS3DataSinkProvider : public TDataProviderBase { } -TIntrusivePtr CreateS3DataSink(TS3State::TPtr state, IHTTPGateway::TPtr gateway) { - return new TS3DataSinkProvider(std::move(state), std::move(gateway)); +TIntrusivePtr CreateS3DataSink(TS3State::TPtr state) { + return new TS3DataSinkProvider(std::move(state)); } } // namespace NYql diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp index 4f7eb51def11..3586024ee032 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp @@ -21,9 +21,9 @@ namespace { class TS3DataSourceProvider : public TDataProviderBase { public: - TS3DataSourceProvider(TS3State::TPtr state, IHTTPGateway::TPtr gateway) + TS3DataSourceProvider(TS3State::TPtr state) : State_(std::move(state)) - , IODiscoveryTransformer_(CreateS3IODiscoveryTransformer(State_, std::move(gateway))) + , IODiscoveryTransformer_(CreateS3IODiscoveryTransformer(State_)) , ConfigurationTransformer_(MakeHolder(State_->Configuration, *State_->Types, TString{S3ProviderName})) , CallableExecutionTransformer_(CreateS3SourceCallableExecutionTransformer(State_)) , TypeAnnotationTransformer_(CreateS3DataSourceTypeAnnotationTransformer(State_)) @@ -160,8 +160,8 @@ class TS3DataSourceProvider : public TDataProviderBase { } -TIntrusivePtr CreateS3DataSource(TS3State::TPtr state, IHTTPGateway::TPtr gateway) { - return new TS3DataSourceProvider(std::move(state), std::move(gateway)); +TIntrusivePtr CreateS3DataSource(TS3State::TPtr state) { + return new TS3DataSourceProvider(std::move(state)); } } // namespace NYql diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp index cad13c9213b1..dcb4c1dc9933 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -74,6 +76,7 @@ class TS3DqIntegration: public TDqIntegrationBase { ui64 Partition(const TDqSettings&, size_t maxPartitions, const TExprNode& node, TVector& partitions, TString*, TExprContext&, bool) override { std::vector> parts; std::optional mbLimitHint; + bool hasDirectories = false; if (const TMaybeNode source = &node) { const auto settings = source.Cast().Settings().Cast(); mbLimitHint = TryExtractLimitHint(settings); @@ -87,6 +90,9 @@ class TS3DqIntegration: public TDqIntegrationBase { paths); parts.reserve(parts.size() + paths.size()); for (const auto& path : paths) { + if (path.IsDirectory) { + hasDirectories = true; + } parts.emplace_back(1U, path); } } @@ -98,6 +104,25 @@ class TS3DqIntegration: public TDqIntegrationBase { YQL_CLOG(TRACE, ProviderS3) << "limited max partitions to " << maxPartitions; } + auto useRuntimeListing = State_->Configuration->UseRuntimeListing.Get().GetOrElse(false); + + YQL_CLOG(DEBUG, ProviderS3) << " useRuntimeListing=" << useRuntimeListing; + if (useRuntimeListing) { + size_t partitionCount = hasDirectories ? maxPartitions : Min(parts.size(), maxPartitions); + partitions.reserve(partitionCount); + for (size_t i = 0; i < partitionCount; ++i) { + NS3::TRange range; + TFileTreeBuilder builder; + builder.Save(&range); + + partitions.emplace_back(); + TStringOutput out(partitions.back()); + range.Save(&out); + } + YQL_CLOG(DEBUG, ProviderS3) << " hasDirectories=" << hasDirectories << ", partitionCount=" << partitionCount << ", maxPartitions=" << maxPartitions; + return 0; + } + if (maxPartitions && parts.size() > maxPartitions) { if (const auto extraParts = parts.size() - maxPartitions; extraParts > maxPartitions) { const auto partsPerTask = (parts.size() - 1ULL) / maxPartitions + 1ULL; @@ -136,6 +161,7 @@ class TS3DqIntegration: public TDqIntegrationBase { range.Save(&out); } + YQL_CLOG(DEBUG, ProviderS3) << " hasDirectories=" << hasDirectories << ", partitionCount=" << partitions.size() << ", maxPartitions=" << maxPartitions;; return 0; } @@ -312,7 +338,7 @@ class TS3DqIntegration: public TDqIntegrationBase { return read; } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t maxPartitions) override { const TDqSource source(&node); if (const auto maySettings = source.Settings().Maybe()) { const auto settings = maySettings.Cast(); @@ -383,6 +409,120 @@ class TS3DqIntegration: public TDqIntegrationBase { srcDesc.MutableSettings()->insert({"addPathIndex", "true"}); } +#if defined(_linux_) || defined(_darwin_) + + auto useRuntimeListing = State_->Configuration->UseRuntimeListing.Get().GetOrElse(false); + srcDesc.SetUseRuntimeListing(useRuntimeListing); + + auto fileQueueBatchSizeLimit = State_->Configuration->FileQueueBatchSizeLimit.Get().GetOrElse(1000000); + srcDesc.MutableSettings()->insert({"fileQueueBatchSizeLimit", ToString(fileQueueBatchSizeLimit)}); + + auto fileQueueBatchObjectCountLimit = State_->Configuration->FileQueueBatchObjectCountLimit.Get().GetOrElse(1000); + srcDesc.MutableSettings()->insert({"fileQueueBatchObjectCountLimit", ToString(fileQueueBatchObjectCountLimit)}); + + YQL_CLOG(DEBUG, ProviderS3) << " useRuntimeListing=" << useRuntimeListing; + + if (useRuntimeListing) { + TPathList paths; + for (auto i = 0u; i < settings.Paths().Size(); ++i) { + const auto& packed = settings.Paths().Item(i); + TPathList pathsChunk; + UnpackPathsList( + packed.Data().Literal().Value(), + FromString(packed.IsText().Literal().Value()), + paths); + paths.insert(paths.end(), + std::make_move_iterator(pathsChunk.begin()), + std::make_move_iterator(pathsChunk.end())); + } + + NS3::TRange range; + range.SetStartPathIndex(0); + TFileTreeBuilder builder; + std::for_each(paths.cbegin(), paths.cend(), [&builder](const TPath& f) { + builder.AddPath(f.Path, f.Size, f.IsDirectory); + }); + builder.Save(&range); + + TVector serialized(1); + TStringOutput out(serialized.front()); + range.Save(&out); + + paths.clear(); + ReadPathsList(srcDesc, {}, serialized, paths); + + NDq::TS3ReadActorFactoryConfig readActorConfig; + ui64 fileSizeLimit = readActorConfig.FileSizeLimit; + if (srcDesc.HasFormat()) { + if (auto it = readActorConfig.FormatSizeLimits.find(srcDesc.GetFormat()); it != readActorConfig.FormatSizeLimits.end()) { + fileSizeLimit = it->second; + } + } + if (srcDesc.HasFormat() && srcDesc.HasRowType()) { + if (srcDesc.GetFormat() == "parquet") { + fileSizeLimit = readActorConfig.BlockFileSizeLimit; + } + } + + TString pathPattern = "*"; + auto pathPatternVariant = NS3Lister::ES3PatternVariant::FilePattern; + auto hasDirectories = std::find_if(paths.begin(), paths.end(), [](const TPath& a) { + return a.IsDirectory; + }) != paths.end(); + + if (hasDirectories) { + auto pathPatternValue = srcDesc.GetSettings().find("pathpattern"); + if (pathPatternValue == srcDesc.GetSettings().cend()) { + ythrow yexception() << "'pathpattern' must be configured for directory listing"; + } + pathPattern = pathPatternValue->second; + + auto pathPatternVariantValue = srcDesc.GetSettings().find("pathpatternvariant"); + if (pathPatternVariantValue == srcDesc.GetSettings().cend()) { + ythrow yexception() + << "'pathpatternvariant' must be configured for directory listing"; + } + if (!TryFromString(pathPatternVariantValue->second, pathPatternVariant)) { + ythrow yexception() + << "Unknown 'pathpatternvariant': " << pathPatternVariantValue->second; + } + } + auto consumersCount = hasDirectories ? maxPartitions : paths.size(); + + auto fileQueuePrefetchSize = State_->Configuration->FileQueuePrefetchSize.Get() + .GetOrElse(consumersCount * srcDesc.GetParallelDownloadCount() * 3); + + YQL_CLOG(DEBUG, ProviderS3) << " hasDirectories=" << hasDirectories << ", consumersCount=" << consumersCount; + + auto fileQueueActor = NActors::TActivationContext::ActorSystem()->Register( + NDq::CreateS3FileQueueActor( + 0ul, + std::move(paths), + fileQueuePrefetchSize, + fileSizeLimit, + useRuntimeListing, + consumersCount, + fileQueueBatchSizeLimit, + fileQueueBatchObjectCountLimit, + State_->Gateway, + connect.Url, + GetAuthInfo(State_->CredentialsFactory, State_->Configuration->Tokens.at(cluster)), + pathPattern, + pathPatternVariant, + NS3Lister::ES3PatternType::Wildcard + ), + NActors::TMailboxType::HTSwap, + State_->ExecutorPoolId + ); + + NActorsProto::TActorId protoId; + ActorIdToProto(fileQueueActor, &protoId); + TString stringId; + google::protobuf::TextFormat::PrintToString(protoId, &stringId); + + srcDesc.MutableSettings()->insert({"fileQueueActor", stringId}); + } +#endif protoSettings.PackFrom(srcDesc); sourceType = "S3Source"; } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp index f5b85cd31c51..2b8e9b649f38 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp @@ -79,7 +79,7 @@ struct TGeneratedColumnsConfig { class TS3IODiscoveryTransformer : public TGraphTransformerBase { public: - TS3IODiscoveryTransformer(TS3State::TPtr state, IHTTPGateway::TPtr gateway) + TS3IODiscoveryTransformer(TS3State::TPtr state) : State_(std::move(state)) , ListerFactory_(NS3Lister::MakeS3ListerFactory( State_->Configuration->MaxInflightListsPerQuery, @@ -87,7 +87,7 @@ class TS3IODiscoveryTransformer : public TGraphTransformerBase { State_->Configuration->ListingCallbackPerThreadQueueSize, State_->Configuration->RegexpCacheSize)) , ListingStrategy_(MakeS3ListingStrategy( - gateway, + State_->Gateway, ListerFactory_, State_->Configuration->MinDesiredDirectoriesOfFilesPerQuery, State_->Configuration->MaxInflightListsPerQuery, @@ -870,8 +870,8 @@ class TS3IODiscoveryTransformer : public TGraphTransformerBase { } -THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state, IHTTPGateway::TPtr gateway) { - return THolder(new TS3IODiscoveryTransformer(std::move(state), std::move(gateway))); +THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state) { + return THolder(new TS3IODiscoveryTransformer(std::move(state))); } } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp index a8de12679332..d72cd9b40535 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp @@ -31,14 +31,14 @@ TDataProviderInitializer GetS3DataProviderInitializer(IHTTPGateway::TPtr gateway if (gatewaysConfig) { state->Configuration->Init(gatewaysConfig->GetS3(), typeCtx); } - state->Configuration->AllowLocalFiles = allowLocalFiles; + state->Gateway = gateway; TDataProviderInfo info; info.Names.insert({TString{S3ProviderName}}); - info.Source = CreateS3DataSource(state, gateway); - info.Sink = CreateS3DataSink(state, gateway); + info.Source = CreateS3DataSource(state); + info.Sink = CreateS3DataSink(state); return info; }; diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider.h b/ydb/library/yql/providers/s3/provider/yql_s3_provider.h index d28144257583..dd02c29e0445 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider.h @@ -27,11 +27,13 @@ struct TS3State : public TThrRefBase TS3Configuration::TPtr Configuration = MakeIntrusive(); const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry = nullptr; ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory; + IHTTPGateway::TPtr Gateway; + ui32 ExecutorPoolId = 0; }; TDataProviderInitializer GetS3DataProviderInitializer(IHTTPGateway::TPtr gateway, ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory = nullptr, bool allowLocalFiles = false); -TIntrusivePtr CreateS3DataSource(TS3State::TPtr state, IHTTPGateway::TPtr gateway); -TIntrusivePtr CreateS3DataSink(TS3State::TPtr state, IHTTPGateway::TPtr gateway); +TIntrusivePtr CreateS3DataSource(TS3State::TPtr state); +TIntrusivePtr CreateS3DataSink(TS3State::TPtr state); } // namespace NYql diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h b/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h index e3694a3ba993..b399fd537356 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h @@ -18,7 +18,7 @@ THolder CreateS3DataSinkExecTransformer(TS3State::TPtr sta THolder CreateS3LogicalOptProposalTransformer(TS3State::TPtr state); THolder CreateS3SourceCallableExecutionTransformer(TS3State::TPtr state); -THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state, IHTTPGateway::TPtr gateway); +THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state); THolder CreateS3PhysicalOptProposalTransformer(TS3State::TPtr state); TExprNode::TPtr ExtractFormat(TExprNode::TListType& settings); diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp index 411c6f3ada2a..a5c373ff677e 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp @@ -22,6 +22,10 @@ TS3Configuration::TS3Configuration() REGISTER_SETTING(*this, AtomicUploadCommit); REGISTER_SETTING(*this, UseConcurrentDirectoryLister); REGISTER_SETTING(*this, MaxDiscoveryFilesPerDirectory).Lower(1); + REGISTER_SETTING(*this, UseRuntimeListing); + REGISTER_SETTING(*this, FileQueueBatchSizeLimit); + REGISTER_SETTING(*this, FileQueueBatchObjectCountLimit); + REGISTER_SETTING(*this, FileQueuePrefetchSize); } TS3Settings::TConstPtr TS3Configuration::Snapshot() const { diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h index 9b6e2c12e87d..ebf6851a6fd3 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h @@ -24,10 +24,14 @@ struct TS3Settings { NCommon::TConfSetting AtomicUploadCommit; // Commit each file independently, w/o transaction semantic over all files NCommon::TConfSetting UseConcurrentDirectoryLister; NCommon::TConfSetting MaxDiscoveryFilesPerDirectory; + NCommon::TConfSetting UseRuntimeListing; // Enables runtime listing + NCommon::TConfSetting FileQueueBatchSizeLimit; // Limits total size of files in one PathBatch from FileQueue + NCommon::TConfSetting FileQueueBatchObjectCountLimit; // Limits count of files in one PathBatch from FileQueue + NCommon::TConfSetting FileQueuePrefetchSize; }; struct TS3ClusterSettings { - TString Url, Token; + TString Url; }; struct TS3Configuration : public TS3Settings, public NCommon::TSettingDispatcher { diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp index 0383ee14662c..bbf8883ec5f2 100644 --- a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp +++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp @@ -94,7 +94,7 @@ class TSolomonDqIntegration: public TDqIntegrationBase { YQL_ENSURE(false, "Unimplemented"); } - void FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString& ) override { + void FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&, size_t) override { YQL_ENSURE(false, "Unimplemented"); } diff --git a/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp b/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp index ab320337bfc1..28d4aebde0f2 100644 --- a/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp +++ b/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp @@ -114,7 +114,7 @@ class TYdbDqIntegration: public TDqIntegrationBase { return read; } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override { const TDqSource source(&node); if (const auto maySettings = source.Settings().Maybe()) { const auto settings = maySettings.Cast(); diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index 3c75331ea690..5b419feb953b 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -1050,7 +1050,7 @@ class TCreateTableNode final: public TAstListNode { Y_ENSURE(resetableParam, "Empty parameter"); Y_ENSURE(resetableParam.IsSet(), "Can't reset " << resetableParam.GetValueReset().Name << " in create mode"); const auto& [id, value] = resetableParam.GetValueSet(); - settings = L(settings, Q(Y(Q(to_lower(id.Name)), value))); + settings = L(settings, Q(Y(Q(id.Name), value))); } if (Params.TableSettings.CompactionPolicy) { settings = L(settings, Q(Y(Q("compactionPolicy"), Params.TableSettings.CompactionPolicy))); @@ -1300,9 +1300,9 @@ class TAlterTableNode final: public TAstListNode { Y_ENSURE(resetableParam, "Empty parameter"); if (resetableParam.IsSet()) { const auto& [id, value] = resetableParam.GetValueSet(); - settings = L(settings, Q(Y(Q(to_lower(id.Name)), value))); + settings = L(settings, Q(Y(Q(id.Name), value))); } else { - settings = L(settings, Q(Y(Q(to_lower(resetableParam.GetValueReset().Name))))); + settings = L(settings, Q(Y(Q(resetableParam.GetValueReset().Name)))); } } if (Params.TableSettings.CompactionPolicy) { diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index 089c6c76feec..79cab52b6406 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -6053,7 +6053,7 @@ Y_UNIT_TEST_SUITE(ExternalTable) { TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { if (word == "Write") { UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('dropColumns '('"my_column")#"); - UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('other_prop (String '"42")) '('x (String '"y")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('Other_Prop (String '"42")) '('x (String '"y")))))#"); UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#"); UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); } diff --git a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json index 20071b53ef3e..55985fcc6192 100644 --- a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json @@ -956,30 +956,30 @@ "test.test[join-count_bans--Results]": [], "test.test[join-grace_join2--Analyze]": [ { - "checksum": "45db7c8306c9626a640bcb81c9c76780", - "size": 4462, - "uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt" + "checksum": "759025fd6317614a253eae816ff5941d", + "size": 5059, + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt" } ], "test.test[join-grace_join2--Debug]": [ { - "checksum": "0684948a27f55b655c998444a9060053", - "size": 1890, - "uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched" + "checksum": "34fdff009f1cfcdc53164eeb5db58dd7", + "size": 2171, + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched" } ], "test.test[join-grace_join2--Plan]": [ { - "checksum": "45db7c8306c9626a640bcb81c9c76780", - "size": 4462, - "uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt" + "checksum": "759025fd6317614a253eae816ff5941d", + "size": 5059, + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt" } ], "test.test[join-grace_join2--Results]": [ { - "checksum": "65a9b307bc9899b17f61962a5d4a49fb", + "checksum": "2ad0b4f3207032d285d5f99430e9abaf", "size": 5737, - "uri": "https://{canondata_backend}/1899731/149477001e0a8762e03fe5262dd2d939b716f0bf/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt" + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt" } ], "test.test[join-inmem_by_uncomparable_structs--Analyze]": [ diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 0b798cfdc8f5..52447344f9c8 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -7400,9 +7400,9 @@ ], "test_sql2yql.test[join-grace_join2]": [ { - "checksum": "4909542187f7c74060abc053d5707f26", - "size": 1627, - "uri": "https://{canondata_backend}/1942278/d84f6d9ab025b27e11f463124468076d499ed9b3/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql" + "checksum": "dec15765d9200297261bb22775ec5338", + "size": 1782, + "uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql" } ], "test_sql2yql.test[join-group_compact_by]": [ @@ -24879,9 +24879,9 @@ ], "test_sql_format.test[join-grace_join2]": [ { - "checksum": "4946227ff929407fc62f749ef756ef4d", - "size": 185, - "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql" + "checksum": "7656454a9434ff51ab800908ae346c42", + "size": 233, + "uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql" } ], "test_sql_format.test[join-group_compact_by]": [ @@ -25649,9 +25649,9 @@ ], "test_sql_format.test[join-nopushdown_filter_with_depends_on]": [ { - "checksum": "7c0b7c120f321f9b415663ece29a09cd", - "size": 247, - "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql" + "checksum": "956eea7d7ef4126950ed02a322c6c492", + "size": 272, + "uri": "https://{canondata_backend}/212715/1c52a4632d14126361f7585c218d202718c6fa0f/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql" } ], "test_sql_format.test[join-opt_on_opt_side]": [ diff --git a/ydb/library/yql/tests/sql/suites/join/grace_join2.sql b/ydb/library/yql/tests/sql/suites/join/grace_join2.sql index ee9866dbf939..1b10d992e347 100644 --- a/ydb/library/yql/tests/sql/suites/join/grace_join2.sql +++ b/ydb/library/yql/tests/sql/suites/join/grace_join2.sql @@ -7,4 +7,4 @@ from plato.customers1 as c1 join plato.customers1 as c2 -on c1.country_id = c2.country_id; +on c1.country_id = c2.country_id order by c1.customer_id, c2.customer_id; diff --git a/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql b/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql index e1dc4b96829c..8e43238765d9 100644 --- a/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql +++ b/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql @@ -1,4 +1,5 @@ /* postgres can not */ +/* hybridfile can not */ /* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 4 */ use plato; diff --git a/ydb/library/yql/tools/dqrun/dqrun.cpp b/ydb/library/yql/tools/dqrun/dqrun.cpp index 873892e7a741..86e98772ad63 100644 --- a/ydb/library/yql/tools/dqrun/dqrun.cpp +++ b/ydb/library/yql/tools/dqrun/dqrun.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,7 @@ #include #include #include +#include #include #ifdef PROFILE_MEMORY_ALLOCATIONS @@ -226,14 +228,20 @@ class TOptPipelineConfigurator : public IPipelineConfigurator { IOutputStream* TracePlan; }; -NDq::IDqAsyncIoFactory::TPtr CreateAsyncIoFactory(const NYdb::TDriver& driver, IHTTPGateway::TPtr httpGateway, NYql::NConnector::IClient::TPtr genericClient, size_t HTTPmaxTimeSeconds, size_t maxRetriesCount) { +NDq::IDqAsyncIoFactory::TPtr CreateAsyncIoFactory( + const NYdb::TDriver& driver, + IHTTPGateway::TPtr httpGateway, + NYql::NConnector::IClient::TPtr genericClient, + ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory, + size_t HTTPmaxTimeSeconds, + size_t maxRetriesCount) { auto factory = MakeIntrusive(); RegisterDqPqReadActorFactory(*factory, driver, nullptr); RegisterYdbReadActorFactory(*factory, driver, nullptr); RegisterS3ReadActorFactory(*factory, nullptr, httpGateway, GetHTTPDefaultRetryPolicy(TDuration::Seconds(HTTPmaxTimeSeconds), maxRetriesCount), {}, nullptr); RegisterS3WriteActorFactory(*factory, nullptr, httpGateway); RegisterClickHouseReadActorFactory(*factory, nullptr, httpGateway); - RegisterGenericReadActorFactory(*factory, nullptr, genericClient); + RegisterGenericReadActorFactory(*factory, credentialsFactory, genericClient); RegisterDqPqWriteActorFactory(*factory, driver, nullptr); @@ -267,7 +275,8 @@ struct TActorIds { std::tuple, TActorIds> RunActorSystem( const TGatewaysConfig& gatewaysConfig, IMetricsRegistryPtr& metricsRegistry, - NYql::NLog::ELevel loggingLevel + NYql::NLog::ELevel loggingLevel, + ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory ) { auto actorSystemManager = std::make_unique(metricsRegistry, YqlToActorsLogLevel(loggingLevel)); TActorIds actorIds; @@ -288,7 +297,7 @@ std::tuple, TActorIds> RunActorSystem( auto httpProxy = NHttp::CreateHttpProxy(); actorIds.HttpProxy = actorSystemManager->GetActorSystem()->Register(httpProxy); - auto databaseResolver = NFq::CreateDatabaseResolver(actorIds.HttpProxy, nullptr); + auto databaseResolver = NFq::CreateDatabaseResolver(actorIds.HttpProxy, credentialsFactory); actorIds.DatabaseResolver = actorSystemManager->GetActorSystem()->Register(databaseResolver); } @@ -427,6 +436,7 @@ int RunMain(int argc, const char* argv[]) TString mountConfig; TString mestricsPusherConfig; TString udfResolver; + TString tokenAccessorEndpoint; bool udfResolverFilterSyscalls = false; TString statFile; TString metricsFile; @@ -585,6 +595,10 @@ int RunMain(int argc, const char* argv[]) failureInjections[key] = std::make_pair(ui32(0), FromString(fail)); } }); + opts.AddLongOption("token-accessor-endpoint", "Network address of Token Accessor service in format grpc(s)://host:port") + .Optional() + .RequiredArgument("ENDPOINT") + .StoreResult(&tokenAccessorEndpoint); opts.AddHelpOption('h'); opts.SetFreeArgsNum(0); @@ -745,12 +759,21 @@ int RunMain(int argc, const char* argv[]) dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway)); } + ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory; + + if (tokenAccessorEndpoint) { + TVector ss = StringSplitter(tokenAccessorEndpoint).SplitByString("://"); + YQL_ENSURE(ss.size() == 2, "Invalid tokenAccessorEndpoint: " << tokenAccessorEndpoint); + + credentialsFactory = NYql::CreateSecuredServiceAccountCredentialsOverTokenAccessorFactory(ss[1], ss[0] == "grpcs", ""); + } + auto dqCompFactory = NMiniKQL::GetCompositeWithBuiltinFactory(factories); // Actor system starts here and will be automatically destroyed when goes out of the scope. std::unique_ptr actorSystemManager; TActorIds actorIds; - std::tie(actorSystemManager, actorIds) = RunActorSystem(gatewaysConfig, metricsRegistry, loggingLevel); + std::tie(actorSystemManager, actorIds) = RunActorSystem(gatewaysConfig, metricsRegistry, loggingLevel, credentialsFactory); IHTTPGateway::TPtr httpGateway; if (gatewaysConfig.HasClickHouse()) { @@ -781,7 +804,8 @@ int RunMain(int argc, const char* argv[]) } genericClient = NConnector::MakeClientGRPC(gatewaysConfig.GetGeneric().GetConnector()); - dataProvidersInit.push_back(GetGenericDataProviderInitializer(genericClient, dbResolver)); + + dataProvidersInit.push_back(GetGenericDataProviderInitializer(genericClient, dbResolver, credentialsFactory)); } if (gatewaysConfig.HasYdb()) { @@ -847,10 +871,9 @@ int RunMain(int argc, const char* argv[]) size_t requestTimeout = gatewaysConfig.HasHttpGateway() && gatewaysConfig.GetHttpGateway().HasRequestTimeoutSeconds() ? gatewaysConfig.GetHttpGateway().GetRequestTimeoutSeconds() : 100; size_t maxRetries = gatewaysConfig.HasHttpGateway() && gatewaysConfig.GetHttpGateway().HasMaxRetries() ? gatewaysConfig.GetHttpGateway().GetMaxRetries() : 2; - bool enableSpilling = res.Has("enable-spilling"); dqGateway = CreateLocalDqGateway(funcRegistry.Get(), dqCompFactory, dqTaskTransformFactory, dqTaskPreprocessorFactories, enableSpilling, - CreateAsyncIoFactory(driver, httpGateway, genericClient, requestTimeout, maxRetries), threads, + CreateAsyncIoFactory(driver, httpGateway, genericClient, credentialsFactory, requestTimeout, maxRetries), threads, metricsRegistry, metricsPusherFactory); } diff --git a/ydb/library/yql/tools/dqrun/ya.make b/ydb/library/yql/tools/dqrun/ya.make index 34114429b797..e2df01c56264 100644 --- a/ydb/library/yql/tools/dqrun/ya.make +++ b/ydb/library/yql/tools/dqrun/ya.make @@ -42,6 +42,7 @@ ENDIF() ydb/library/yql/providers/clickhouse/provider ydb/library/yql/providers/common/comp_nodes ydb/library/yql/providers/common/proto + ydb/library/yql/providers/common/token_accessor/client ydb/library/yql/providers/common/udf_resolve ydb/library/yql/providers/generic/actors ydb/library/yql/providers/generic/provider diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 70b8a8668d11..80c39b86479a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -2,6 +2,8 @@ #if USE_ARROW || USE_PARQUET +#include +#include #include #include #include @@ -394,6 +396,43 @@ namespace NDB } } + static void fillArrowArrayWithDateTime64ColumnData( + const DataTypePtr & type, + ColumnPtr write_column, + const PaddedPODArray * null_bytemap, + const String & format_name, + arrow::ArrayBuilder* array_builder, + size_t start, + size_t end) + { + const auto * datetime64_type = assert_cast(type.get()); + const auto & column = assert_cast &>(*write_column); + arrow::TimestampBuilder & builder = assert_cast(*array_builder); + arrow::Status status; + + auto scale = datetime64_type->getScale(); + bool need_rescale = scale % 3; + auto rescale_multiplier = DecimalUtils::scaleMultiplier(3 - scale % 3); + for (size_t value_i = start; value_i < end; ++value_i) + { + if (null_bytemap && (*null_bytemap)[value_i]) + { + status = builder.AppendNull(); + } + else + { + auto value = static_cast(column[value_i].get>().getValue()); + if (need_rescale) + { + if (common::mulOverflow(value, rescale_multiplier, value)) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow"); + } + status = builder.Append(value); + } + checkStatus(status, write_column->getName(), format_name); + } + } + static void fillArrowArray( const String & column_name, ColumnPtr & column, @@ -454,6 +493,10 @@ namespace NDB DataTypePtr array_type = assert_cast(column_type.get())->getNestedType(); fillArrowArrayWithArrayColumnData(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); } + else if (isDateTime64(column_type)) + { + fillArrowArrayWithDateTime64ColumnData(column_type, column, null_bytemap, format_name, array_builder, start, end); + } else if (isDecimal(column_type)) { auto fill_decimal = [&](const auto & types) -> bool @@ -548,6 +591,18 @@ namespace NDB } } + static arrow::TimeUnit::type getArrowTimeUnit(const DataTypeDateTime64 * type) + { + UInt32 scale = type->getScale(); + if (scale == 0) + return arrow::TimeUnit::SECOND; + if (scale > 0 && scale <= 3) + return arrow::TimeUnit::MILLI; + if (scale > 3 && scale <= 6) + return arrow::TimeUnit::MICRO; + return arrow::TimeUnit::NANO; + } + static std::shared_ptr getArrowType( DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable) { @@ -630,6 +685,12 @@ namespace NDB getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable)); } + if (isDateTime64(column_type)) + { + const auto * datetime64_type = assert_cast(column_type.get()); + return arrow::timestamp(getArrowTimeUnit(datetime64_type), datetime64_type->getTimeZone().getTimeZone()); + } + const std::string type_name = column_type->getFamilyName(); if (const auto * arrow_type_it = std::find_if( internal_type_to_arrow_type.begin(), diff --git a/ydb/public/sdk/cpp/client/ydb_params/params.h b/ydb/public/sdk/cpp/client/ydb_params/params.h index 6e8b204408e8..5d29822f1805 100644 --- a/ydb/public/sdk/cpp/client/ydb_params/params.h +++ b/ydb/public/sdk/cpp/client/ydb_params/params.h @@ -28,6 +28,7 @@ namespace NExperimental { namespace NQuery { class TExecQueryImpl; + class TQueryClient; } class TParamsBuilder; @@ -40,6 +41,7 @@ class TParams { friend class NScripting::TScriptingClient; friend class NExperimental::TStreamQueryClient; friend class NQuery::TExecQueryImpl; + friend class NQuery::TQueryClient; friend class NYdb::TProtoAccessor; public: bool Empty() const; diff --git a/ydb/public/sdk/cpp/client/ydb_query/client.cpp b/ydb/public/sdk/cpp/client/ydb_query/client.cpp index 22d0389d3846..93926e51df36 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/client.cpp +++ b/ydb/public/sdk/cpp/client/ydb_query/client.cpp @@ -81,7 +81,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public Connections_, DbDriverState_, query, txControl, params, settings, session); } - NThreading::TFuture ExecuteScript(const TString& script, const TExecuteScriptSettings& settings) { + NThreading::TFuture ExecuteScript(const TString& script, const TMaybe& params, const TExecuteScriptSettings& settings) { using namespace Ydb::Query; auto request = MakeOperationRequest(settings); request.set_exec_mode(settings.ExecMode_); @@ -89,6 +89,11 @@ class TQueryClient::TImpl: public TClientImplCommon, public request.mutable_script_content()->set_syntax(settings.Syntax_); request.mutable_script_content()->set_text(script); SetDuration(settings.ResultsTtl_, *request.mutable_results_ttl()); + + if (params) { + *request.mutable_parameters() = params->GetProtoMap(); + } + auto promise = NThreading::NewPromise(); auto responseCb = [promise] @@ -536,7 +541,13 @@ TAsyncExecuteQueryIterator TQueryClient::StreamExecuteQuery(const TString& query NThreading::TFuture TQueryClient::ExecuteScript(const TString& script, const TExecuteScriptSettings& settings) { - return Impl_->ExecuteScript(script, settings); + return Impl_->ExecuteScript(script, {}, settings); +} + +NThreading::TFuture TQueryClient::ExecuteScript(const TString& script, + const TParams& params, const TExecuteScriptSettings& settings) +{ + return Impl_->ExecuteScript(script, params, settings); } TAsyncFetchScriptResultsResult TQueryClient::FetchScriptResults(const NKikimr::NOperationId::TOperationId& operationId, int64_t resultSetIndex, diff --git a/ydb/public/sdk/cpp/client/ydb_query/client.h b/ydb/public/sdk/cpp/client/ydb_query/client.h index b77461771729..d6a09ed6b635 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/client.h +++ b/ydb/public/sdk/cpp/client/ydb_query/client.h @@ -90,6 +90,9 @@ class TQueryClient { NThreading::TFuture ExecuteScript(const TString& script, const TExecuteScriptSettings& settings = TExecuteScriptSettings()); + NThreading::TFuture ExecuteScript(const TString& script, + const TParams& params, const TExecuteScriptSettings& settings = TExecuteScriptSettings()); + TAsyncFetchScriptResultsResult FetchScriptResults(const NKikimr::NOperationId::TOperationId& operationId, int64_t resultSetIndex, const TFetchScriptResultsSettings& settings = TFetchScriptResultsSettings()); diff --git a/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp b/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp index ceeaf25ab919..ef5854b983c2 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp +++ b/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp @@ -136,16 +136,21 @@ struct TExecuteQueryBuffer : public TThrRefBase, TNonCopyable { Iterator_.ReadNext().Subscribe([self](TAsyncExecuteQueryPart partFuture) mutable { auto part = partFuture.ExtractValue(); + if (const auto& st = part.GetStats()) { + self->Stats_ = st; + } + if (!part.IsSuccess()) { + TMaybe stats; + std::swap(self->Stats_, stats); + if (part.EOS()) { TVector issues; TVector resultProtos; - TMaybe stats; TMaybe tx; std::swap(self->Issues_, issues); std::swap(self->ResultSets_, resultProtos); - std::swap(self->Stats_, stats); std::swap(self->Tx_, tx); TVector resultSets; @@ -160,7 +165,7 @@ struct TExecuteQueryBuffer : public TThrRefBase, TNonCopyable { std::move(tx) )); } else { - self->Promise_.SetValue(TExecuteQueryResult(std::move(part), {}, {}, {})); + self->Promise_.SetValue(TExecuteQueryResult(std::move(part), {}, std::move(stats), {})); } return; @@ -185,10 +190,6 @@ struct TExecuteQueryBuffer : public TThrRefBase, TNonCopyable { resultSet.mutable_rows()->Add(inRsProto.rows().begin(), inRsProto.rows().end()); } - if (const auto& st = part.GetStats()) { - self->Stats_ = st; - } - if (const auto& tx = part.GetTransaction()) { self->Tx_ = tx; } diff --git a/ydb/public/sdk/cpp/client/ydb_query/stats.cpp b/ydb/public/sdk/cpp/client/ydb_query/stats.cpp index f5fbc9d6c02e..c007547d4e84 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/stats.cpp +++ b/ydb/public/sdk/cpp/client/ydb_query/stats.cpp @@ -46,6 +46,16 @@ TMaybe TExecStats::GetPlan() const { return proto.query_plan(); } +TMaybe TExecStats::GetAst() const { + auto proto = Impl_->Proto; + + if (proto.query_ast().empty()) { + return {}; + } + + return proto.query_ast(); +} + TDuration TExecStats::GetTotalDuration() const { return TDuration::MicroSeconds(Impl_->Proto.total_duration_us()); } diff --git a/ydb/public/sdk/cpp/client/ydb_query/stats.h b/ydb/public/sdk/cpp/client/ydb_query/stats.h index 1fed19f6e353..3a62045a72f9 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/stats.h +++ b/ydb/public/sdk/cpp/client/ydb_query/stats.h @@ -28,6 +28,7 @@ class TExecStats { TString ToString(bool withPlan = false) const; TMaybe GetPlan() const; + TMaybe GetAst() const; TDuration GetTotalDuration() const; TDuration GetTotalCpuTime() const; diff --git a/ydb/public/tools/lib/cmds/__init__.py b/ydb/public/tools/lib/cmds/__init__.py index 81289eb06907..591ed4dfe06b 100644 --- a/ydb/public/tools/lib/cmds/__init__.py +++ b/ydb/public/tools/lib/cmds/__init__.py @@ -8,7 +8,9 @@ import string import typing # noqa: F401 import sys +from six.moves.urllib.parse import urlparse +from ydb.library.yql.providers.common.proto.gateways_config_pb2 import TGenericConnectorConfig from ydb.tests.library.common import yatest_common from ydb.tests.library.harness.kikimr_cluster import kikimr_cluster_factory from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator @@ -255,6 +257,34 @@ def enable_tls(): return os.getenv('YDB_GRPC_ENABLE_TLS') == 'true' +def generic_connector_config(): + endpoint = os.getenv("FQ_CONNECTOR_ENDPOINT") + if not endpoint: + return None + + parsed = urlparse(endpoint) + if not parsed.hostname: + raise ValueError("Invalid host '{}' in FQ_CONNECTOR_ENDPOINT".format(parsed.hostname)) + + if not (1024 <= parsed.port <= 65535): + raise ValueError("Invalid port '{}' in FQ_CONNECTOR_ENDPOINT".format(parsed.port)) + + valid_schemes = ['grpc', 'grpcs'] + if parsed.scheme not in valid_schemes: + raise ValueError("Invalid schema '{}' in FQ_CONNECTOR_ENDPOINT (possible: {})".format(parsed.scheme, valid_schemes)) + + cfg = TGenericConnectorConfig() + cfg.Endpoint.host = parsed.hostname + cfg.Endpoint.port = parsed.port + + if parsed.scheme == 'grpc': + cfg.UseSsl = False + elif parsed.scheme == 'grpcs': + cfg.UseSsl = True + + return cfg + + def grpc_tls_data_path(arguments): default_store = arguments.ydb_working_dir if arguments.ydb_working_dir else None return os.getenv('YDB_GRPC_TLS_DATA_PATH', default_store) @@ -335,6 +365,7 @@ def deploy(arguments): default_users=default_users(), extra_feature_flags=enable_feature_flags, extra_grpc_services=arguments.enabled_grpc_services, + generic_connector_config=generic_connector_config(), **optionals ) diff --git a/ydb/public/tools/lib/cmds/ut/test.py b/ydb/public/tools/lib/cmds/ut/test.py new file mode 100644 index 000000000000..e5164d2413d5 --- /dev/null +++ b/ydb/public/tools/lib/cmds/ut/test.py @@ -0,0 +1,26 @@ +import os + +from ydb.public.tools.lib.cmds import generic_connector_config +from ydb.library.yql.providers.common.proto.gateways_config_pb2 import TGenericConnectorConfig + + +def test_kikimr_config_generator_generic_connector_config(): + os.environ["FQ_CONNECTOR_ENDPOINT"] = "grpc://localhost:50051" + + expected = TGenericConnectorConfig() + expected.Endpoint.host = "localhost" + expected.Endpoint.port = 50051 + expected.UseSsl = False + + actual = generic_connector_config() + assert actual == expected + + os.environ["FQ_CONNECTOR_ENDPOINT"] = "grpcs://localhost:50051" + + expected = TGenericConnectorConfig() + expected.Endpoint.host = "localhost" + expected.Endpoint.port = 50051 + expected.UseSsl = True + + actual = generic_connector_config() + assert actual == expected diff --git a/ydb/public/tools/lib/cmds/ut/ya.make b/ydb/public/tools/lib/cmds/ut/ya.make new file mode 100644 index 000000000000..97b269c2ee0b --- /dev/null +++ b/ydb/public/tools/lib/cmds/ut/ya.make @@ -0,0 +1,12 @@ +PY3TEST() + +PEERDIR( + ydb/public/tools/lib/cmds + ydb/library/yql/providers/common/proto +) + +TEST_SRCS( + test.py +) + +END() diff --git a/ydb/public/tools/lib/cmds/ya.make b/ydb/public/tools/lib/cmds/ya.make index d8ef2f061bd9..53ccb215ec56 100644 --- a/ydb/public/tools/lib/cmds/ya.make +++ b/ydb/public/tools/lib/cmds/ya.make @@ -4,8 +4,11 @@ PY_SRCS( ) PEERDIR( - ydb/tests/library + contrib/python/six library/python/testing/recipe + ydb/tests/library ) END() + +RECURSE_FOR_TESTS(ut) diff --git a/ydb/public/tools/local_ydb/__main__.py b/ydb/public/tools/local_ydb/__main__.py index 0d7affd19c5a..f315b682cf9a 100644 --- a/ydb/public/tools/local_ydb/__main__.py +++ b/ydb/public/tools/local_ydb/__main__.py @@ -9,7 +9,7 @@ \033[94m To deploy the local YDB cluster: - {prog} deploy --ydb-working-dir /absolute/path/to/working/directory --ydb-binary-path /path/to/kikimr/driver + {prog} deploy --ydb-working-dir /absolute/path/to/working/directory --ydb-binary-path /path/to/kikimr/driver To cleanup the deployed YDB cluster (this includes removal of working directory, all configuration files, disks and so on): diff --git a/ydb/public/tools/local_ydb/ya.make b/ydb/public/tools/local_ydb/ya.make index e5a2a4165e30..9fa570b365df 100644 --- a/ydb/public/tools/local_ydb/ya.make +++ b/ydb/public/tools/local_ydb/ya.make @@ -3,6 +3,7 @@ PY3_PROGRAM(local_ydb) PY_SRCS(__main__.py) PEERDIR( + ydb/library/yql/providers/common/proto ydb/public/tools/lib/cmds ) diff --git a/ydb/services/fq/ut_integration/fq_ut.cpp b/ydb/services/fq/ut_integration/fq_ut.cpp index 1b5764fee4cf..c011c80138d7 100644 --- a/ydb/services/fq/ut_integration/fq_ut.cpp +++ b/ydb/services/fq/ut_integration/fq_ut.cpp @@ -197,32 +197,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { } } - Y_UNIT_TEST(Basic_EmptyTable) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - UpsertToExistingTable(driver, location); - NYdb::NFq::TClient client(driver); - const TString folderId = "some_folder_id"; - { - const auto request = ::NFq::TCreateConnectionBuilder() - .SetName("testdbempty") - .CreateYdb("Root", location, "") - .Build(); - const auto result = client - .CreateConnection(request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_C(result.GetStatus() == EStatus::SUCCESS, result.GetIssues().ToString()); - } - - const TString queryId = CreateNewHistoryAndWaitFinish( - folderId, client, - "select count(*) from testdbempty.`yq/empty_table`", - FederatedQuery::QueryMeta::COMPLETED); - CheckGetResultData(client, queryId, folderId, 1, 1, 0); - } - Y_UNIT_TEST(Basic_EmptyList) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); ui16 grpc = server.GetPort(); @@ -256,32 +230,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { CreateNewHistoryAndWaitFinish(folderId, client, "select null", expectedStatus); } - SIMPLE_UNIT_FORKED_TEST(Basic_Tagged) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - NYdb::NFq::TClient client(driver); - const TString folderId = "some_folder_id"; - - - { - auto request = ::NFq::TCreateConnectionBuilder{} - .SetName("testdb00") - .CreateYdb("Root", location, "") - .Build(); - - auto result = client.CreateConnection( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - } - - auto expectedStatus = FederatedQuery::QueryMeta::COMPLETED; - CreateNewHistoryAndWaitFinish(folderId, client, "select AsTagged(count(*), \"tag\") from testdb00.`yq/connections`", expectedStatus); - } - Y_UNIT_TEST(Basic_TaggedLiteral) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); ui16 grpc = server.GetPort(); @@ -295,50 +243,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { } // use fork for data test due to ch initialization problem - SIMPLE_UNIT_FORKED_TEST(ExtendedDatabaseId) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - - NYdb::NFq::TClient client(driver); - const TString folderId = "folder_id_" + CreateGuidAsString(); - { - const auto request = ::NFq::TCreateConnectionBuilder() - .SetName("testdb01") - .CreateYdb("FakeDatabaseId", "") - .Build(); - const auto result = client - .CreateConnection(request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_C(result.GetStatus() == EStatus::SUCCESS, result.GetIssues().ToString()); - } - - { - const auto request = ::NFq::TCreateConnectionBuilder() - .SetName("testdb02") - .CreateYdb("FakeDatabaseId", "") - .Build(); - const auto result = client - .CreateConnection(request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_C(result.GetStatus() == EStatus::SUCCESS, result.GetIssues().ToString()); - } - - { - const auto queryId = CreateNewHistoryAndWaitFinish(folderId, client, - "select count(*) from testdb01.`yq/connections`", FederatedQuery::QueryMeta::COMPLETED); - CheckGetResultData(client, queryId, folderId, 1, 1, 2); - } - - { - // test connections db with 2 databaseId - const auto queryId = CreateNewHistoryAndWaitFinish(folderId, client, - "select count(*) from testdb02.`yq/connections`", FederatedQuery::QueryMeta::COMPLETED); - CheckGetResultData(client, queryId, folderId, 1, 1, 2); - } - } - Y_UNIT_TEST(DescribeConnection) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); ui16 grpc = server.GetPort(); @@ -855,70 +759,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { } } -Y_UNIT_TEST_SUITE(Yq_2) { - SIMPLE_UNIT_FORKED_TEST(ReadFromYdbOverYq) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - NYdb::NFq::TClient client(driver); - const auto folderId = TString(__func__) + "folder_id"; - - { - auto request = ::NFq::TCreateConnectionBuilder{} - .SetName("testdb00") - .CreateYdb("Root", location, "") - .Build(); - - auto result = client.CreateConnection( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - } - - TString queryId; - { - auto request = ::NFq::TCreateQueryBuilder{} - .SetText("select count(*) from testdb00.`yq/connections`") - .Build(); - auto result = client.CreateQuery( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - queryId = result.GetResult().query_id(); - } - - { - auto request = ::NFq::TDescribeQueryBuilder{}.SetQueryId(queryId).Build(); - auto result = DoWithRetryOnRetCode([&]() { - auto result = client.DescribeQuery( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - const auto status = result.GetResult().query().meta().status(); - PrintProtoIssues(result.GetResult().query().issue()); - return status == FederatedQuery::QueryMeta::COMPLETED; - }, TRetryOptions(10)); - UNIT_ASSERT_C(result, "the execution of the query did not end within the time limit"); - } - - { - auto request = ::NFq::TGetResultDataBuilder{}.SetQueryId(queryId).Build(); - auto result = client.GetResultData( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - const auto& resultSet = result.GetResult().result_set(); - UNIT_ASSERT_VALUES_EQUAL(resultSet.rows().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(resultSet.columns().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(resultSet.rows(0).items(0).uint64_value(), 1); - } - } -} - Y_UNIT_TEST_SUITE(PrivateApi) { Y_UNIT_TEST(PingTask) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); diff --git a/ydb/tests/fq/s3/canondata/result.json b/ydb/tests/fq/s3/canondata/result.json index e02eb08a50c9..9b05383e5ac5 100644 --- a/ydb/tests/fq/s3/canondata/result.json +++ b/ydb/tests/fq/s3/canondata/result.json @@ -77,6 +77,9 @@ "test_format_setting.TestS3.test_timestamp_simple_format_insert[v1-common/simple_format/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_format_insert[v1-common/simple_format/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_format_insert[v1-common/simple_format/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv" }, @@ -86,6 +89,9 @@ "test_format_setting.TestS3.test_timestamp_simple_format_insert[v2-common/simple_format/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_format_insert[v2-common/simple_format/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_format_insert[v2-common/simple_format/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv" }, @@ -95,6 +101,9 @@ "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v1-timestamp/simple_iso/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v1-timestamp/simple_iso/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v1-timestamp/simple_iso/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv" }, @@ -104,6 +113,9 @@ "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v2-timestamp/simple_iso/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v2-timestamp/simple_iso/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v2-timestamp/simple_iso/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv" }, @@ -113,6 +125,9 @@ "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v1-common/simple_posix/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v1-common/simple_posix/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v1-common/simple_posix/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv" }, @@ -122,6 +137,9 @@ "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v2-common/simple_posix/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v2-common/simple_posix/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v2-common/simple_posix/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv" }, @@ -143,6 +161,15 @@ "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.json-json_each_row-UNIX_TIME_SECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json" }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MICROSECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MILLISECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_SECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv" }, @@ -170,6 +197,15 @@ "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.json-json_each_row-UNIX_TIME_SECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json" }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MICROSECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MILLISECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_SECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv" }, diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv deleted file mode 100644 index d5849fbf9c86..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20",100 -"Apple",2,"2022-10-21",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json deleted file mode 100644 index 29b2d985fc93..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-21","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.parquet-parquet_/date_time_format_common_simple_format_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.parquet-parquet_/date_time_format_common_simple_format_test.parquet deleted file mode 100644 index d5de5793afcdb354f09cc8b8464b95437e88a5e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1064 zcmcJPO-sW-5QZn4&ALbrMLS^w3-+K(5nJ?7PhRRlJQS%Sc<~ZqumRh~wAH`w5kL_t`>FvIi@1sbWkc(|czUrYkp1>iOt>hdUA1eDD4 zh_CJ{A%#X>K=S)&Rxvt;>t9uBr2V4d6Yb{G_YIV%5yjJ3s$n%FN3Nl)G!zeE^gOLj zRGJ+Uv078B(169f#~~=O45s1CM+w+Ef}&3VS!!p;L_I)urbg~Ab_SPw+8b}7$;=j8 z!~Dky(q$=o_s_c14dljhoen_nAoi2jb-3yWaoqQlWbxj83zKEEn0JSR)4{1X)&S)T J7Qlr55g#+3Z`S|- diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv deleted file mode 100644 index 2f5ff74076fb..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 100 -Apple 2 2022-10-21 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv deleted file mode 100644 index e0c73f1170e2..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv +++ /dev/null @@ -1,4 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20T16:40:47Z",100 -"Apple",2,"2022-10-20T13:40:47Z",22 -"Pear",15,"2022-10-20T16:40:47Z",33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json deleted file mode 100644 index 88b515dd7ac4..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json +++ /dev/null @@ -1,3 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20T16:40:47Z","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20T13:40:47Z","Weight":22} -{"Fruit":"Pear","Price":15,"Time":"2022-10-20T16:40:47Z","Weight":33} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.parquet-parquet_/date_time_simple_iso_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.parquet-parquet_/date_time_simple_iso_test.parquet deleted file mode 100644 index 00869d5441c49d6b42fc7ce278280a083772b42b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1091 zcmcIkQA@)x5Wc2qtWubW7g7v3VTL`-F(0-E@nt^vG;oUGiwI>FS8!YHHU)pZm#tfI z8zMMq!(G0++w$)Q5FxFTLI%}48kS{5i0LGn;&!=5-ER?p!__ANUnoa!Sx-N8KIHqRQY`}D42SN>t9#f$ore5&(u-UI-#{L zsf{Xb<5tpQM?fy=ZXv07h?AFoamT*dAx(=zDf-Ry-nlESuah`qAVMI bl;7iQoQ#I$X1!5wgdGjA{K26&fu4^q+~Rif diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv deleted file mode 100644 index 2c33d3d95966..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv +++ /dev/null @@ -1,4 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20T16:40:47Z 100 -Apple 2 2022-10-20T13:40:47Z 22 -Pear 15 2022-10-20T16:40:47Z 33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv deleted file mode 100644 index 3084851bff2e..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20 16:40:47",100 -"Apple",2,"2022-10-20 16:41:47",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json deleted file mode 100644 index c5ac9fb0f5a8..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20 16:40:47","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20 16:41:47","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv deleted file mode 100644 index 5353438e9ef0..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 16:40:47 100 -Apple 2 2022-10-20 16:41:47 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv deleted file mode 100644 index d5849fbf9c86..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20",100 -"Apple",2,"2022-10-21",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json deleted file mode 100644 index 29b2d985fc93..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-21","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv deleted file mode 100644 index 2f5ff74076fb..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 100 -Apple 2 2022-10-21 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet similarity index 51% rename from ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet rename to ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet index d70e27676f48e57032feb192592d7a27b41f4702..790de8357026f2de55f04366985f63ce93d80c69 100644 GIT binary patch delta 161 zcmdnXv4mrTC8HGsV-70=gF}@(D}yM5D4QskjwlNdvWc?ENN_;Jfiy^w7=s3iz~m-I qO*yqQ7#=(79eC3Ws{NMfXPU3z|@K{Xkdy?E@sqJ WCsu7Ev&iJLjHf0qWI9ZOp2YwZYarJE diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..790de8357026f2de55f04366985f63ce93d80c69 GIT binary patch literal 1060 zcmb`HO-sW-5QZn4%^IYKs-2L)LVK`FkhauAJ$bMm#6yrGf)_8L1{>%{Oj`wiy|YQ1 zeh~FwNnmGZCXZWKS{HJlDEQm+=Cb!yUsh)Pu51zVq^;4Coge8P%bsinTS4Elu)e z{yhel>xj#B%mIIEM~)oEl*raFgESsS1q@S6{!*Zns)6Tgs_HcuV^Zhlt8QuX9dTSc z_tUDAh;7$V;0cL*g~y-b$%f8ST>e?5lyOJ7*Gw5wbRUrGh|8TU?r-f1G<&0*c#h(? z(WZ~KIwI+QtWbi*yq6xhVm=&2lMrpdN)uMo0m!tSHHSJ+cBVwL7M#cy(Ga{&STTUZ zJ?RAp7&3Lh(NKr(BOR1tZvU-5oj$G%$EpGN^(0K+m(ijXDRKEQn<3S#Z~kbolCFnGF)6^x z=|S=(Vf^_aAthr8O>tPOGJ)9UuM@Zy*ubEe(21M;se+SM8a-dL*kUE#FY)e5Q2_Sn zEX7?Ck2AF37>ifk$nhiIMkQWwo8|Wp)j$;uRL|8cbXbAsM}t1-;C1+rJ*B7s>us2< z382q*IUiaAvP(7Ejd3YKe literal 0 HcmV?d00001 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..af3c1e98ac68ab515a9ef257b81b5722f83776cf GIT binary patch literal 1078 zcmb`HOH0E*5XUF!ZdjqXDRKEQn<3S#Z~kbolCFnGF)6^x z=|S=(Vf^_aAthr8O>tPOGJ)9UuM@Zy*ubEe(21M;se+SM8a-dL*kUE#FY)e5Q2_Sn zEX7?Ck2AF37>ifk$nhiIMkQWwo8|Wp)j$;uRL|8cbXbAsM}t1-;C1+rJ*B7s>us2< z382q*IUiaAvP(7Ejd3YKe literal 0 HcmV?d00001 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv deleted file mode 100644 index 3084851bff2e..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20 16:40:47",100 -"Apple",2,"2022-10-20 16:41:47",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json deleted file mode 100644 index c5ac9fb0f5a8..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20 16:40:47","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20 16:41:47","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv deleted file mode 100644 index 5353438e9ef0..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 16:40:47 100 -Apple 2 2022-10-20 16:41:47 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1d17c634854cd93611c5c7c2612b6d73b9175be0 GIT binary patch literal 1046 zcmb`HO-sW-5QZn4O2q8P@*j?n`uj7#qV%%ed*k0vt)|QUg%Ks$NIm4&1|!W4Je@ks^0X zVKRtoeOrLJ3i$$zKPAHzm!rAbS)r6{L$B9d57KcPjGKt$ULFtDb_JGL>E%Ah$=hJn zJ)sXsus2q8P@*j?n`uj7#qV%%ed*k0vt)|QUg%Ks$NIm4&1|!W4Je@ks^0X zVKRtoeOrLJ3i$$zKPAHzm!rAbS)r6{L$B9d57KcPjGKt$ULFtDb_JGL>E%Ah$=hJn zJ)sXsusF^HoK0bTS0<4PY_ zJ!mnbODA_DFXq{`3(%HA&+jtezI5)gWwJn9aA65cKWleqn;%d-hBi=qGk_T+suhzL z)JQY>p;_C4kj$u(nRP)x<&1=ar>4+e@eHzL6t8(~ZL2>bRHAk8dW9y3ucvi7C;vSe zYX{HGW$FGn65*y$LUM(yfMgHJq-1n7Q*v|HZkS5ajU>L%QG&knDoSQnB{SQy9YtvSU3O3sxc0uy_XdF)>bOF646ukmy8#}KPbpqLWI-2$H zNCkvLIW`Uj0|@RbKm2#g={@#{-ghbWK65V|!L=nE>ekj-8f72Lc+noEX*S5P&D?W{D86N@v;NiWThALs}}>9=z3qryhz3f)pXtU<0;|X{)c|oB2A< zmbB3{c(Ej#ot?k^m>Dwc-(4cZ9$pWSBRd8-nQ#EO31{>;B=wWzHDV+TqcBB47rnr^ z(&DNIO=k4cS-X)J^X$q6XiK5zcNuVBI=AU6UZO3yu!N?hG{&Gwmi1B)gKv3(K>jwL6f7?8eQ{f zKGo{ir&Yq|W(Mi5eTlK4$vL?~mT|KCc$#B6nkiWbD@jy}`>T@AbeW)aPHVT4#wwYy zn@Z*4p$ScVoqWEQ!x7MZB${#_66eVo7Kk9D0=_R)cdTya0J(uaA5Y;`>V literal 0 HcmV?d00001 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8ff52239979a60afcf8aec50cd52e37bc67e572e GIT binary patch literal 1079 zcmb`H!AiqG5QZn|W{D6GrL$~s(F%6yp)D0`51y68%b}>7BySO;pcjQH0=nqc zO)5QHb)d$KE}blmvY2O=E`_j2f7x5fz!G$F({jA-sV}70LF|>i|n+41w zQ7xIgdWkfnADX>i5Rw^HGP5rTsCXkG=P4<)mpr{R9!4u3Tifc73OmtSc(p{6gPVtb znLhOWWdWStJKK1!QKg&b5#gp#KyrmFhh&fOxL|ZNQ*!gvcA1@|8z#PF1_7MYx^1G_ zl}uzgHCDWlkehf=n8=@^_;plt)upb=wL_*4YbAED+5VsnlFx>tXdI#o=v5(~6+qj> zS)XVUD9-GtmV+ZzCmf9T7mncC5)O@R?JNn?_eC^s j43nf0rs?#v@)4!8csi-Ho2_Qc4-~-a2Ac66{W5+4F^HoK0bTS0<4PY_ zJ!mnbODA_DFXq{`3(%HA&+jtezI5)gWwJn9aA65cKWleqn;%d-hBi=qGk_T+suhzL z)JQY>p;_C4kj$u(nRP)x<&1=ar>4+e@eHzL6t8(~ZL2>bRHAk8dW9y3ucvi7C;vSe zYX{HGW$FGn65*y$LUM(yfMgHJq-1n7Q*v|HZkS5ajU>L%QG&knDoSQnB{SQy9YtvSU3O3sxc0uy_XdF)>bOF646ukmy8#}KPbpqLWI-2$H zNCkvLIW`Uj0|@RbKm2#g={@#{-ghbWK65V|!L=nE>ekj-8f72Lc+noEX*S5P&D?W{D86N@v;NiWThALs}}>9=z3qryhz3f)pXtU<0;|X{)c|oB2A< zmbB3{c(Ej#ot?k^m>Dwc-(4cZ9$pWSBRd8-nQ#EO31{>;B=wWzHDV+TqcBB47rnr^ z(&DNIO=k4cS-X)J^X$q6XiK5zcNuVBI=AU6UZO3yu!N?hG{&Gwmi1B)gKv3(K>jwL6f7?8eQ{f zKGo{ir&Yq|W(Mi5eTlK4$vL?~mT|KCc$#B6nkiWbD@jy}`>T@AbeW)aPHVT4#wwYy zn@Z*4p$ScVoqWEQ!x7MZB${#_66eVo7Kk9D0=_R)cdTya0J(uaA5Y;`>V literal 0 HcmV?d00001 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8ff52239979a60afcf8aec50cd52e37bc67e572e GIT binary patch literal 1079 zcmb`H!AiqG5QZn|W{D6GrL$~s(F%6yp)D0`51y68%b}>7BySO;pcjQH0=nqc zO)5QHb)d$KE}blmvY2O=E`_j2f7x5fz!G$F({jA-sV}70LF|>i|n+41w zQ7xIgdWkfnADX>i5Rw^HGP5rTsCXkG=P4<)mpr{R9!4u3Tifc73OmtSc(p{6gPVtb znLhOWWdWStJKK1!QKg&b5#gp#KyrmFhh&fOxL|ZNQ*!gvcA1@|8z#PF1_7MYx^1G_ zl}uzgHCDWlkehf=n8=@^_;plt)upb=wL_*4YbAED+5VsnlFx>tXdI#o=v5(~6+qj> zS)XVUD9-GtmV+ZzCmf9T7mncC5)O@R?JNn?_eC^s j43nf0rs?#v@)4!8csi-Ho2_Qc4-~-a2Ac66{W5+4 2020 order by Fruit; ''' @@ -334,7 +347,9 @@ def test_validation(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -358,7 +373,9 @@ def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("json_bucket", "json_bucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -406,7 +423,9 @@ def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_projection_date(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_date(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -450,7 +469,9 @@ def test_projection_date(self, kikimr, s3, client): }, partitioned_by=["dt"]) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM bindings.my_binding; ''' @@ -520,7 +541,9 @@ def test_projection_validate_columns(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_no_paritioning_columns(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_no_paritioning_columns(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -571,8 +594,11 @@ def test_no_paritioning_columns(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("logs2", "logs2") - sql = R''' - $projection = @@ { + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' + $projection = + @@ { "projection.enabled" : "true", "storage.location.template" : "/${date}", "projection.date.type" : "date", @@ -639,7 +665,9 @@ def test_no_paritioning_columns(self, kikimr, s3, client): ({"folder_id": "my_folder13"}, "year Uint64", False), ({"folder_id": "my_folder14"}, "year Date", False) ], indirect=["client"]) - def test_projection_integer_type_validation(self, kikimr, s3, client, column_type, is_correct): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_integer_type_validation(self, kikimr, s3, client, column_type, is_correct, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -663,7 +691,9 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "test_projection_integer_type_validation") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -677,7 +707,7 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ "storage.location.template" : "${year}-03-05" } @@; - ''' + R''' + ''' + f''' SELECT * FROM `fruitbucket`.`/` WITH ( @@ -690,7 +720,7 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ partitioned_by=(year), projection=$projection ) - '''.format(column_type=column_type) + ''' query_id = client.create_query("simple", sql).result.query_id if is_correct: @@ -726,7 +756,9 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ ({"folder_id": "my_folder8"}, "year Utf8", False), ({"folder_id": "my_folder9"}, "year Date", False), ], indirect=["client"]) - def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, column_type, is_correct): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, column_type, is_correct, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -750,7 +782,9 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "test_projection_enum_type_invalid_validation") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -762,7 +796,7 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum "storage.location.template" : "${year}-03-05" } @@; - ''' + R''' + ''' + f''' SELECT * FROM `fruitbucket`.`/` WITH ( @@ -775,7 +809,7 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum partitioned_by=(year), projection=$projection ) - '''.format(column_type=column_type) + ''' query_id = client.create_query("simple", sql).result.query_id if is_correct: @@ -813,7 +847,9 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum ({"folder_id": "my_folder15"}, "year Datetime", False), ({"folder_id": "my_folder16"}, "year Datetime NOT NULL", True), ], indirect=["client"]) - def test_projection_date_type_validation(self, kikimr, s3, client, column_type, is_correct): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_date_type_validation(self, kikimr, s3, client, column_type, is_correct, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -837,7 +873,9 @@ def test_projection_date_type_validation(self, kikimr, s3, client, column_type, kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "test_projection_date_type_invalid_validation") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -853,7 +891,7 @@ def test_projection_date_type_validation(self, kikimr, s3, client, column_type, "storage.location.template" : "${year}-03-05" } @@; - ''' + R''' + ''' + f''' SELECT * FROM `fruitbucket`.`/` WITH ( @@ -866,7 +904,7 @@ def test_projection_date_type_validation(self, kikimr, s3, client, column_type, partitioned_by=(year), projection=$projection ) - '''.format(column_type=column_type) + ''' query_id = client.create_query("simple", sql).result.query_id if is_correct: @@ -1068,7 +1106,9 @@ def test_binding_projection_date_type_validation(self, kikimr, s3, client, colum @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_raw_format(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_raw_format(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -1094,7 +1134,9 @@ def test_raw_format(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("rawbucket", "raw_bucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { "projection.enabled" : "true", "storage.location.template" : "/${timestamp}", @@ -1124,6 +1166,10 @@ def test_raw_format(self, kikimr, s3, client): ) ''' + # temporary fix for dynamic listing + if yq_version == "v1": + sql = 'pragma dq.MaxTasksPerStage="10"; ' + sql + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) @@ -1145,8 +1191,9 @@ def test_raw_format(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - @pytest.mark.parametrize("blocks", [False, True]) - def test_parquet(self, kikimr, s3, blocks, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_parquet(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -1174,8 +1221,9 @@ def test_parquet(self, kikimr, s3, blocks, client): query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) - sql = 'pragma s3.UseBlocksSource="{}";'.format("true" if blocks else "false") - sql = sql + R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT foo, bar, x FROM pb.`part/` WITH ( diff --git a/ydb/tests/fq/s3/test_format_setting.py b/ydb/tests/fq/s3/test_format_setting.py index b51693e442a1..4ffc9ed05d6a 100644 --- a/ydb/tests/fq/s3/test_format_setting.py +++ b/ydb/tests/fq/s3/test_format_setting.py @@ -334,7 +334,8 @@ def test_timestamp_simple_iso(self, kikimr, s3, client, filename, type_format): @pytest.mark.parametrize("filename, type_format", [ ("timestamp/simple_iso/test.csv", "csv_with_names"), ("timestamp/simple_iso/test.tsv", "tsv_with_names"), - ("timestamp/simple_iso/test.json", "json_each_row") + ("timestamp/simple_iso/test.json", "json_each_row"), + ("timestamp/simple_iso/test.parquet", "parquet") ]) def test_timestamp_simple_iso_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -383,7 +384,8 @@ def test_timestamp_simple_posix(self, kikimr, s3, client, filename, type_format) @pytest.mark.parametrize("filename, type_format", [ ("common/simple_posix/test.csv", "csv_with_names"), ("common/simple_posix/test.tsv", "tsv_with_names"), - ("common/simple_posix/test.json", "json_each_row") + ("common/simple_posix/test.json", "json_each_row"), + ("common/simple_posix/test.parquet", "parquet") ]) def test_timestamp_simple_posix_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -432,7 +434,8 @@ def test_date_time_simple_iso(self, kikimr, s3, client, filename, type_format): @pytest.mark.parametrize("filename, type_format", [ ("date_time/simple_iso/test.csv", "csv_with_names"), ("date_time/simple_iso/test.tsv", "tsv_with_names"), - ("date_time/simple_iso/test.json", "json_each_row") + ("date_time/simple_iso/test.json", "json_each_row"), + ("date_time/simple_iso/test.parquet", "parquet") ]) def test_date_time_simple_iso_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -507,7 +510,8 @@ def test_date_time_simple_posix_insert(self, kikimr, s3, client, filename, type_ @pytest.mark.parametrize("filename, type_format", [ ("timestamp/unix_time/test.csv", "csv_with_names"), ("timestamp/unix_time/test.tsv", "tsv_with_names"), - ("timestamp/unix_time/test.json", "json_each_row") + ("timestamp/unix_time/test.json", "json_each_row"), + ("timestamp/unix_time/test.parquet", "parquet") ]) def test_timestamp_unix_time_insert(self, kikimr, s3, client, filename, type_format, timestamp_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -531,7 +535,8 @@ def test_timestamp_unix_time_insert(self, kikimr, s3, client, filename, type_for @pytest.mark.parametrize("filename, type_format", [ ("common/simple_format/test.csv", "csv_with_names"), ("common/simple_format/test.tsv", "tsv_with_names"), - ("common/simple_format/test.json", "json_each_row") + ("common/simple_format/test.json", "json_each_row"), + ("common/simple_format/test.parquet", "parquet") ]) def test_timestamp_simple_format_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) diff --git a/ydb/tests/fq/s3/test_insert.py b/ydb/tests/fq/s3/test_insert.py index 420b31046187..edfd2324d900 100644 --- a/ydb/tests/fq/s3/test_insert.py +++ b/ydb/tests/fq/s3/test_insert.py @@ -21,7 +21,7 @@ def create_bucket_and_upload_file(self, filename, s3, kikimr): @yq_all @pytest.mark.parametrize("dataset_name", ["dataset", "dataにちは% set"]) - @pytest.mark.parametrize("format", ["json_list", "json_each_row", "csv_with_names"]) + @pytest.mark.parametrize("format", ["json_list", "json_each_row", "csv_with_names", "parquet"]) @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) def test_insert(self, kikimr, s3, client, format, dataset_name): resource = boto3.resource( diff --git a/ydb/tests/fq/s3/test_s3.py b/ydb/tests/fq/s3/test_s3.py index 091f9efc90af..21fa0382a91e 100644 --- a/ydb/tests/fq/s3/test_s3.py +++ b/ydb/tests/fq/s3/test_s3.py @@ -15,7 +15,9 @@ class TestS3(TestYdsBase): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_csv(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_csv(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -42,7 +44,9 @@ def test_csv(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "fbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM fruitbucket.`fruits.csv` WITH (format=csv_with_names, SCHEMA ( @@ -79,7 +83,58 @@ def test_csv(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_raw(self, kikimr, s3, client): + def test_csv_with_hopping(self, kikimr, s3, client): + resource = boto3.resource( + "s3", + endpoint_url=s3.s3_url, + aws_access_key_id="key", + aws_secret_access_key="secret_key" + ) + + bucket = resource.Bucket("fbucket") + bucket.create(ACL='public-read') + bucket.objects.all().delete() + + s3_client = boto3.client( + "s3", + endpoint_url=s3.s3_url, + aws_access_key_id="key", + aws_secret_access_key="secret_key" + ) + + fruits = R'''Time,Fruit,Price +0,Banana,3 +1,Apple,2 +2,Pear,15''' + s3_client.put_object(Body=fruits, Bucket='fbucket', Key='fruits.csv', ContentType='text/plain') + kikimr.control_plane.wait_bootstrap(1) + client.create_storage_connection("fruitbucket", "fbucket") + + sql = R''' + SELECT COUNT(*) as count, + FROM fruitbucket.`fruits.csv` + WITH (format=csv_with_names, SCHEMA ( + Time UInt64 NOT NULL, + Fruit String NOT NULL, + Price Int NOT NULL + )) + GROUP BY HOP(CAST(Time AS Timestamp?), "PT1M", "PT1M", "PT1M") + ''' + + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id + client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) + + data = client.get_result_data(query_id) + result_set = data.result.result_set + logging.debug(str(result_set)) + assert len(result_set.columns) == 1 + assert len(result_set.rows) == 1 + assert result_set.rows[0].items[0].uint64_value == 3 + + @yq_all + @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_raw(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -106,7 +161,9 @@ def test_raw(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("rawbucket", "rbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT Data FROM rawbucket.`*` WITH (format=raw, SCHEMA ( @@ -115,6 +172,9 @@ def test_raw(self, kikimr, s3, client): ORDER BY Data DESC ''' + # if yq_version == "v1": + sql = 'pragma dq.MaxTasksPerStage="10"; ' + sql + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) @@ -133,7 +193,8 @@ def test_raw(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) @pytest.mark.parametrize("kikimr", [{"raw": 3, "": 4}], indirect=True) - def test_limit(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_limit(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -158,7 +219,9 @@ def test_limit(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("limbucket", "lbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT Data FROM limbucket.`*` WITH (format=raw, SCHEMA ( @@ -171,7 +234,9 @@ def test_limit(self, kikimr, s3, client): client.wait_query_status(query_id, fq.QueryMeta.FAILED) assert "Size of object file1.txt = 5 and exceeds limit = 3 specified for format raw" in str(client.describe_query(query_id).result) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM limbucket.`*` WITH (format=csv_with_names, SCHEMA ( @@ -185,7 +250,8 @@ def test_limit(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_bad_format(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_bad_format(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -210,7 +276,9 @@ def test_bad_format(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("badbucket", "bbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select * from badbucket.`*.*` with (format=json_list, schema (data string)) limit 1; ''' @@ -256,7 +324,7 @@ def put_kv(k, v): client.create_yds_connection(name="yds", database_id="FakeDatabaseId") # Run query - sql = R''' + sql = f''' PRAGMA dq.MaxTasksPerStage="2"; $s3_dict_raw = @@ -279,7 +347,7 @@ def put_kv(k, v): FROM ( SELECT Yson::Parse(Data) AS yson_data - FROM yds.`{input_topic}` WITH SCHEMA (Data String NOT NULL)); + FROM yds.`{self.input_topic}` WITH SCHEMA (Data String NOT NULL)); $joined_seq = SELECT @@ -289,15 +357,11 @@ def put_kv(k, v): INNER JOIN $s3_dict AS s3_dict ON yds_seq.key = s3_dict.key; - INSERT INTO yds.`{output_topic}` + INSERT INTO yds.`{self.output_topic}` SELECT Yson::SerializeText(Yson::From(TableRow())) FROM $joined_seq; - '''\ - .format( - input_topic=self.input_topic, - output_topic=self.output_topic, - ) + ''' query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.STREAMING).result.query_id client.wait_query_status(query_id, fq.QueryMeta.RUNNING) @@ -388,7 +452,7 @@ def test_write_result(self, kikimr, s3, client, yq_version): time.sleep(10) # 2 x node info update period - sql = R''' + sql = f''' SELECT Fruit, sum(Price) as Price, sum(Weight) as Weight FROM fruitbucket.`fruits*` WITH (format=csv_with_names, SCHEMA ( @@ -418,7 +482,8 @@ def test_write_result(self, kikimr, s3, client, yq_version): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_precompute(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_precompute(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -445,7 +510,9 @@ def test_precompute(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("prebucket", "pbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select count(*) as Cnt from prebucket.`file1.txt` with (format=raw, schema( Data String NOT NULL )) @@ -476,7 +543,9 @@ def test_precompute(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_failed_precompute(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_failed_precompute(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -490,7 +559,9 @@ def test_failed_precompute(self, kikimr, s3, client): client.create_storage_connection("fp", "fpbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + insert into fp.`path/` with (format=json_each_row) select * from AS_TABLE([<|foo:123, bar:"xxx"u|>,<|foo:456, bar:"yyy"u|>]); ''' @@ -498,7 +569,9 @@ def test_failed_precompute(self, kikimr, s3, client): query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select count(*) from fp.`path/` with (format=json_each_row, schema( foo Int NOT NULL, bar String NOT NULL @@ -520,7 +593,9 @@ def test_failed_precompute(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_missed(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_missed(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -547,7 +622,9 @@ def test_missed(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "fbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM fruitbucket.`fruits.csv` WITH (format=csv_with_names, SCHEMA ( @@ -564,7 +641,9 @@ def test_missed(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_simple_hits_47(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_simple_hits_47(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -591,7 +670,9 @@ def test_simple_hits_47(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "fbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + $data = SELECT * FROM fruitbucket.`fruits.csv` WITH (format=csv_with_names, SCHEMA ( @@ -624,7 +705,8 @@ def test_simple_hits_47(self, kikimr, s3, client): @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) @pytest.mark.parametrize("raw", [True, False]) @pytest.mark.parametrize("path_pattern", ["exact_file", "directory_scan"]) - def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -662,13 +744,16 @@ def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern): else: raise ValueError(f"Unknown path_pattern {path_pattern}") - sql = R''' + format = "raw" if raw else "csv_with_names" + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT count(*) as cnt FROM i18nbucket.`{path}` WITH (format={format}, SCHEMA ( Data String )); - '''.format(path=path, format="raw" if raw else "csv_with_names") + ''' query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) @@ -687,7 +772,9 @@ def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern): @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) @pytest.mark.parametrize("raw", [False, True]) @pytest.mark.parametrize("partitioning", ["hive", "projection"]) - def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_version): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_version, runtime_listing): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -718,33 +805,38 @@ def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_versi kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("i18nbucket", "ibucket") + format = "raw" if raw else "csv_with_names" if partitioning == "projection": - sql = R''' - $projection = @@ { - "projection.enabled" : "true", - "storage.location.template" : "/folder=${folder}", - "projection.folder.type" : "enum", - "projection.folder.values" : "%こん,に ちは,に" - } @@;''' + ''' - SELECT count(*) as cnt - FROM i18nbucket.`dataset` - WITH ( - format={}, - SCHEMA ( - Data String, - folder String NOT NULL - ), - partitioned_by=(folder), - projection=$projection - ) - WHERE folder = 'に ちは' or folder = '%こん'; - '''.format("raw" if raw else "csv_with_names") + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' + $projection = @@ { + "projection.enabled" : "true", + "storage.location.template" : "/folder=${folder}", + "projection.folder.type" : "enum", + "projection.folder.values" : "%こん,に ちは,に" + } @@;''' + f''' + SELECT count(*) as cnt + FROM i18nbucket.`dataset` + WITH ( + format={format}, + SCHEMA ( + Data String, + folder String NOT NULL + ), + partitioned_by=(folder), + projection=$projection + ) + WHERE folder = 'に ちは' or folder = '%こん'; + ''' elif partitioning == "hive": - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT count(*) as cnt FROM i18nbucket.`dataset` WITH ( - format={}, + format={format}, SCHEMA ( Data String, folder String NOT NULL @@ -752,7 +844,7 @@ def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_versi partitioned_by=(folder) ) WHERE folder = 'に ちは' or folder = '%こん'; - '''.format("raw" if raw else "csv_with_names") + ''' else: raise ValueError(f"Unknown partitioning {partitioning}") @@ -771,7 +863,9 @@ def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_versi @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_huge_source(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_huge_source(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -785,15 +879,20 @@ def test_huge_source(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("hugebucket", "hbucket") - sql = R''' + long_literal = "*" * 1024 + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + insert into hugebucket.`path/` with (format=csv_with_names) - select * from AS_TABLE(ListReplicate(<|s:"{}"u|>, 1024 * 10)); - '''.format("*" * 1024) + select * from AS_TABLE(ListReplicate(<|s:"{long_literal}"u|>, 1024 * 10)); + ''' query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select count(*) from hugebucket.`path/` with (format=csv_with_names, schema( s String NOT NULL )) diff --git a/ydb/tests/fq/s3/test_yq_v2.py b/ydb/tests/fq/s3/test_yq_v2.py index 88d6837a5158..baf96ea0db96 100644 --- a/ydb/tests/fq/s3/test_yq_v2.py +++ b/ydb/tests/fq/s3/test_yq_v2.py @@ -17,7 +17,8 @@ class TestS3(TestYdsBase): @yq_v2 @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_yqv2_enabled(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", [False, True]) + def test_yqv2_enabled(self, kikimr, s3, client, runtime_listing): resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -56,7 +57,8 @@ def test_yqv2_enabled(self, kikimr, s3, client): "csv_delimiter": ";" }) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{str(runtime_listing).lower()}"; pragma s3.UseBlocksSource="false"; SELECT * FROM my_binding; -- syntax without bindings. supported only in yqv2 diff --git a/ydb/tests/fq/yds/test_select_1.py b/ydb/tests/fq/yds/test_select_1.py index 6c43b72ffae4..116c37dd5ee7 100644 --- a/ydb/tests/fq/yds/test_select_1.py +++ b/ydb/tests/fq/yds/test_select_1.py @@ -120,11 +120,11 @@ def test_compile_error(self, client, yq_version): assert "Failed to parse query" in describe_string, describe_string @yq_all - def test_ast_in_failed_query(self, client): - sql = "SELECT unwrap(1 / 0)" + def test_ast_in_failed_query_runtime(self, client): + sql = "SELECT unwrap(42 / 0) AS error_column" query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.FAILED) - ast = str(client.describe_query(query_id).result.query.ast) - assert ast != "", "Query ast not found" + ast = client.describe_query(query_id).result.query.ast.data + assert "(\'\"error_column\" (Unwrap (/ (Int32 \'\"42\")" in ast, "Invalid query ast" diff --git a/ydb/tests/library/harness/kikimr_config.py b/ydb/tests/library/harness/kikimr_config.py index 5a8c3a680758..17f20055b437 100644 --- a/ydb/tests/library/harness/kikimr_config.py +++ b/ydb/tests/library/harness/kikimr_config.py @@ -159,7 +159,9 @@ def __init__( hive_config=None, datashard_config=None, enforce_user_token_requirement=False, - default_user_sid=None + default_user_sid=None, + pg_compatible_expirement=False, + generic_connector_config=None, # typing.Optional[TGenericConnectorConfig] ): if extra_feature_flags is None: extra_feature_flags = [] @@ -376,6 +378,41 @@ def __init__( if default_user_sid: self.yaml_config["domains_config"]["security_config"]["default_user_sids"] = [default_user_sid] + if pg_compatible_expirement: + self.yaml_config["table_service_config"]["enable_prepared_ddl"] = True + # self.yaml_config["table_service_config"]["enable_ast_cache"] = True + # self.yaml_config["table_service_config"]["enable_pg_consts_to_params"] = True + self.yaml_config["table_service_config"]["index_auto_choose_mode"] = 'max_used_prefix' + self.yaml_config["feature_flags"]['enable_temp_tables'] = True + self.yaml_config["feature_flags"]['enable_table_pg_types'] = True + + if generic_connector_config: + if "query_service_config" not in self.yaml_config: + self.yaml_config["query_service_config"] = {} + + self.yaml_config["query_service_config"]["generic"] = { + "connector": { + "endpoint": { + "host": generic_connector_config.Endpoint.host, + "port": generic_connector_config.Endpoint.port, + }, + "use_ssl": generic_connector_config.UseSsl + }, + "default_settings": [ + { + "name": "DateTimeFormat", + "value": "string" + }, + { + "name": "UsePredicatePushdown", + "value": "true" + } + ] + } + + self.yaml_config["feature_flags"]["enable_external_data_sources"] = True + self.yaml_config["feature_flags"]["enable_script_execution_operations"] = True + @property def pdisks_info(self): return self._pdisks_info diff --git a/ydb/tests/library/ut/kikimr_config.py b/ydb/tests/library/ut/kikimr_config.py new file mode 100644 index 000000000000..e32102107b49 --- /dev/null +++ b/ydb/tests/library/ut/kikimr_config.py @@ -0,0 +1,23 @@ +from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator + +from ydb.library.yql.providers.common.proto.gateways_config_pb2 import TGenericConnectorConfig + + +def test_kikimr_config_generator_generic_connector_config(): + generic_connector_config = TGenericConnectorConfig() + generic_connector_config.Endpoint.host = "localhost" + generic_connector_config.Endpoint.port = 50051 + generic_connector_config.UseSsl = False + + cfg_gen = KikimrConfigGenerator(generic_connector_config=generic_connector_config) + yaml_config = cfg_gen.yaml_config + + assert yaml_config["query_service_config"]["generic"]["connector"]["endpoint"]["host"] == generic_connector_config.Endpoint.host + assert yaml_config["query_service_config"]["generic"]["connector"]["endpoint"]["port"] == generic_connector_config.Endpoint.port + assert yaml_config["query_service_config"]["generic"]["connector"]["use_ssl"] == generic_connector_config.UseSsl + assert yaml_config["query_service_config"]["generic"]["default_settings"] == [ + {"name": "DateTimeFormat", "value": "string"}, + {"name": "UsePredicatePushdown", "value": "true"}, + ] + assert yaml_config["feature_flags"]["enable_external_data_sources"] is True + assert yaml_config["feature_flags"]["enable_script_execution_operations"] is True diff --git a/ydb/tests/library/ut/ya.make b/ydb/tests/library/ut/ya.make new file mode 100644 index 000000000000..9becd0e76ef8 --- /dev/null +++ b/ydb/tests/library/ut/ya.make @@ -0,0 +1,12 @@ +PY3TEST() + +PEERDIR( + ydb/tests/library + ydb/library/yql/providers/common/proto +) + +TEST_SRCS( + kikimr_config.py +) + +END() diff --git a/ydb/tests/library/ya.make b/ydb/tests/library/ya.make index 81b4cc2aed83..2e043071c2b2 100644 --- a/ydb/tests/library/ya.make +++ b/ydb/tests/library/ya.make @@ -95,6 +95,7 @@ PEERDIR( library/python/svn_version library/python/testing/yatest_common ydb/core/protos + ydb/library/yql/providers/common/proto ydb/public/api/grpc ydb/public/api/grpc/draft ydb/public/api/protos @@ -103,3 +104,5 @@ PEERDIR( ) END() + +RECURSE_FOR_TESTS(ut) diff --git a/ydb/tests/tools/kqprun/.gitignore b/ydb/tests/tools/kqprun/.gitignore index 9aec6451388c..e240c8c1f76b 100644 --- a/ydb/tests/tools/kqprun/.gitignore +++ b/ydb/tests/tools/kqprun/.gitignore @@ -1,3 +1,7 @@ sync_dir +example +udfs *.log +*.json *.sql +*.bin diff --git a/ydb/tests/tools/kqprun/kqprun.cpp b/ydb/tests/tools/kqprun/kqprun.cpp index 5b250f2734da..efb4f6cb5acb 100644 --- a/ydb/tests/tools/kqprun/kqprun.cpp +++ b/ydb/tests/tools/kqprun/kqprun.cpp @@ -1,5 +1,7 @@ #include "src/kqp_runner.h" +#include + #include #include @@ -22,7 +24,7 @@ struct TExecutionOptions { TString ScriptTraceId = "kqprun"; bool HasResults() const { - return ScriptQuery && ScriptQueryAction == NKikimrKqp::QUERY_ACTION_EXECUTE && !ClearExecution; + return ScriptQuery && ScriptQueryAction == NKikimrKqp::QUERY_ACTION_EXECUTE; } }; @@ -46,6 +48,10 @@ void RunScript(const TExecutionOptions& executionOptions, const NKqpRun::TRunner if (!runner.ExecuteScript(executionOptions.ScriptQuery, executionOptions.ScriptQueryAction, executionOptions.ScriptTraceId)) { ythrow yexception() << "Script execution failed"; } + Cout << colors.Yellow() << "Fetching script results..." << colors.Default() << Endl; + if (!runner.FetchScriptResults()) { + ythrow yexception() << "Fetch script results failed"; + } } else { if (!runner.ExecuteQuery(executionOptions.ScriptQuery, executionOptions.ScriptQueryAction, executionOptions.ScriptTraceId)) { ythrow yexception() << "Query execution failed"; @@ -54,11 +60,10 @@ void RunScript(const TExecutionOptions& executionOptions, const NKqpRun::TRunner } if (executionOptions.HasResults()) { - Cout << colors.Yellow() << "Writing script results..." << colors.Default() << Endl; - if (!runner.WriteScriptResults()) { - ythrow yexception() << "Writing script results failed"; - } + runner.PrintScriptResults(); } + + Cout << colors.Yellow() << "Finalization of kqp runner..." << colors.Default() << Endl; } @@ -74,6 +79,20 @@ THolder SetupDefaultFileOutput(const TString& filePath, IOutputStre } +TIntrusivePtr CreateFunctionRegistry(const TString& udfsDirectory, TVector udfsPaths) { + if (!udfsDirectory.empty() || !udfsPaths.empty()) { + NColorizer::TColors colors = NColorizer::AutoColors(Cout); + Cout << colors.Yellow() << "Fetching udfs..." << colors.Default() << Endl; + } + + NKikimr::NMiniKQL::FindUdfsInDir(udfsDirectory, &udfsPaths); + auto functionRegistry = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, udfsPaths)->Clone(); + NKikimr::NMiniKQL::FillStaticModules(*functionRegistry); + + return functionRegistry; +} + + void RunMain(int argc, const char* argv[]) { TExecutionOptions executionOptions; NKqpRun::TRunnerOptions runnerOptions; @@ -87,9 +106,11 @@ void RunMain(int argc, const char* argv[]) { TString logFile = "-"; TString appConfigFile = "./configuration/app_config.conf"; + TString traceOptType = "disabled"; TString scriptQueryAction = "execute"; TString planOutputFormat = "pretty"; TString resultOutputFormat = "rows"; + i64 resultsRowsLimit = 1000; TVector udfsPaths; TString udfsDirectory; @@ -103,7 +124,7 @@ void RunMain(int argc, const char* argv[]) { .Optional() .RequiredArgument("FILE") .StoreResult(&schemeQueryFile); - options.AddLongOption("app-config", "File with app config (TAppConfig)") + options.AddLongOption('c', "app-config", "File with app config (TAppConfig)") .Optional() .RequiredArgument("FILE") .DefaultValue(appConfigFile) @@ -135,33 +156,33 @@ void RunMain(int argc, const char* argv[]) { .NoArgument() .DefaultValue(executionOptions.ClearExecution) .SetFlag(&executionOptions.ClearExecution); - options.AddLongOption("trace-opt", "print AST in the begin of each transformation") + options.AddLongOption('T', "trace-opt", "print AST in the begin of each transformation, one of { scheme | script | all }") .Optional() - .NoArgument() - .DefaultValue(runnerOptions.YdbSettings.TraceOpt) - .SetFlag(&runnerOptions.YdbSettings.TraceOpt); - options.AddLongOption("script-action", "Script query execute action, one of { execute | explain }") + .RequiredArgument("STR") + .DefaultValue(traceOptType) + .StoreResult(&traceOptType); + options.AddLongOption('A', "script-action", "Script query execute action, one of { execute | explain }") .Optional() .RequiredArgument("STR") .DefaultValue(scriptQueryAction) .StoreResult(&scriptQueryAction); - options.AddLongOption("plan-format", "Script query plan format, one of { pretty | table | json }") + options.AddLongOption('P', "plan-format", "Script query plan format, one of { pretty | table | json }") .Optional() .RequiredArgument("STR") .DefaultValue(planOutputFormat) .StoreResult(&planOutputFormat); - options.AddLongOption("result-format", "Script query result format, one of { rows | full }") + options.AddLongOption('R', "result-format", "Script query result format, one of { rows | full }") .Optional() .RequiredArgument("STR") .DefaultValue(resultOutputFormat) .StoreResult(&resultOutputFormat); - options.AddLongOption("result-rows-limit", "Rows limit for script execution results") + options.AddLongOption('L', "result-rows-limit", "Rows limit for script execution results") .Optional() .RequiredArgument("INT") - .DefaultValue(runnerOptions.ResultsRowsLimit) - .StoreResult(&runnerOptions.ResultsRowsLimit); + .DefaultValue(resultsRowsLimit) + .StoreResult(&resultsRowsLimit); - options.AddLongOption("udf", "Load shared library with UDF by given path") + options.AddLongOption('u', "udf", "Load shared library with UDF by given path") .Optional() .RequiredArgument("FILE") .AppendTo(&udfsPaths); @@ -191,15 +212,19 @@ void RunMain(int argc, const char* argv[]) { // Runner options - if (runnerOptions.ResultsRowsLimit < 0) { - ythrow yexception() << "Results rows limit less than zero"; - } - THolder resultFileHolder = SetupDefaultFileOutput(resultOutputFile, runnerOptions.ResultOutput); THolder schemeQueryAstFileHolder = SetupDefaultFileOutput(schemeQueryAstFile, runnerOptions.SchemeQueryAstOutput); THolder scriptQueryAstFileHolder = SetupDefaultFileOutput(scriptQueryAstFile, runnerOptions.ScriptQueryAstOutput); THolder scriptQueryPlanFileHolder = SetupDefaultFileOutput(scriptQueryPlanFile, runnerOptions.ScriptQueryPlanOutput); + runnerOptions.TraceOptType = + (traceOptType == TStringBuf("all")) ? NKqpRun::TRunnerOptions::ETraceOptType::All + : (traceOptType == TStringBuf("scheme")) ? NKqpRun::TRunnerOptions::ETraceOptType::Scheme + : (traceOptType == TStringBuf("script")) ? NKqpRun::TRunnerOptions::ETraceOptType::Script + : (traceOptType == TStringBuf("disabled")) ? NKqpRun::TRunnerOptions::ETraceOptType::Disabled + : NKqpRun::TRunnerOptions::ETraceOptType::All; + runnerOptions.YdbSettings.TraceOptEnabled = runnerOptions.TraceOptType != NKqpRun::TRunnerOptions::ETraceOptType::Disabled; + runnerOptions.ResultOutputFormat = (resultOutputFormat == TStringBuf("rows")) ? NKqpRun::TRunnerOptions::EResultOutputFormat::RowsJson : (resultOutputFormat == TStringBuf("full")) ? NKqpRun::TRunnerOptions::EResultOutputFormat::FullJson @@ -215,20 +240,22 @@ void RunMain(int argc, const char* argv[]) { if (logFile != "-") { runnerOptions.YdbSettings.LogOutputFile = logFile; + std::remove(logFile.c_str()); } runnerOptions.YdbSettings.YqlToken = GetEnv("YQL_TOKEN"); - - NKikimr::NMiniKQL::FindUdfsInDir(udfsDirectory, &udfsPaths); - auto functionRegistry = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, udfsPaths)->Clone(); - NKikimr::NMiniKQL::FillStaticModules(*functionRegistry); - runnerOptions.YdbSettings.FunctionRegistry = functionRegistry.Get(); + runnerOptions.YdbSettings.FunctionRegistry = CreateFunctionRegistry(udfsDirectory, udfsPaths).Get(); TString appConfigData = TFileInput(appConfigFile).ReadAll(); if (!google::protobuf::TextFormat::ParseFromString(appConfigData, &runnerOptions.YdbSettings.AppConfig)) { ythrow yexception() << "Bad format of app configuration"; } + if (resultsRowsLimit < 0) { + ythrow yexception() << "Results rows limit less than zero"; + } + runnerOptions.YdbSettings.AppConfig.MutableQueryServiceConfig()->SetScriptResultRowsLimit(resultsRowsLimit); + RunScript(executionOptions, runnerOptions); } diff --git a/ydb/tests/tools/kqprun/src/actors.cpp b/ydb/tests/tools/kqprun/src/actors.cpp index c8c31e99151f..48f73e4cabfc 100644 --- a/ydb/tests/tools/kqprun/src/actors.cpp +++ b/ydb/tests/tools/kqprun/src/actors.cpp @@ -9,12 +9,19 @@ namespace { class TRunScriptActorMock : public NActors::TActorBootstrapped { public: - TRunScriptActorMock(THolder request, NThreading::TPromise promise, ui64 resultSizeLimit) + TRunScriptActorMock(THolder request, + NThreading::TPromise promise, + ui64 resultRowsLimit, ui64 resultSizeLimit, std::vector& resultSets) : Request_(std::move(request)) , Promise_(promise) + , ResultRowsLimit_(std::numeric_limits::max()) , ResultSizeLimit_(std::numeric_limits::max()) + , ResultSets_(resultSets) { - if (resultSizeLimit && resultSizeLimit < std::numeric_limits::max()) { + if (resultRowsLimit) { + ResultRowsLimit_ = resultRowsLimit; + } + if (resultSizeLimit) { ResultSizeLimit_ = resultSizeLimit; } } @@ -36,6 +43,28 @@ class TRunScriptActorMock : public NActors::TActorBootstrappedRecord.SetSeqNo(ev->Get()->Record.GetSeqNo()); response->Record.SetFreeSpace(ResultSizeLimit_); + auto resultSetIndex = ev->Get()->Record.GetQueryResultIndex(); + if (resultSetIndex >= ResultSets_.size()) { + ResultSets_.resize(resultSetIndex + 1); + } + + if (!ResultSets_[resultSetIndex].truncated()) { + for (auto& row : *ev->Get()->Record.MutableResultSet()->mutable_rows()) { + if (static_cast(ResultSets_[resultSetIndex].rows_size()) >= ResultRowsLimit_) { + ResultSets_[resultSetIndex].set_truncated(true); + break; + } + + if (ResultSets_[resultSetIndex].ByteSizeLong() + row.ByteSizeLong() > ResultSizeLimit_) { + ResultSets_[resultSetIndex].set_truncated(true); + break; + } + + *ResultSets_[resultSetIndex].add_rows() = std::move(row); + } + *ResultSets_[resultSetIndex].mutable_columns() = ev->Get()->Record.GetResultSet().columns(); + } + Send(ev->Sender, response.Release()); } @@ -47,13 +76,17 @@ class TRunScriptActorMock : public NActors::TActorBootstrapped Request_; NThreading::TPromise Promise_; - i64 ResultSizeLimit_; + ui64 ResultRowsLimit_; + ui64 ResultSizeLimit_; + std::vector& ResultSets_; }; } // anonymous namespace -NActors::IActor* CreateRunScriptActorMock(THolder request, NThreading::TPromise promise, ui64 resultSizeLimit) { - return new TRunScriptActorMock(std::move(request), promise, resultSizeLimit); +NActors::IActor* CreateRunScriptActorMock(THolder request, + NThreading::TPromise promise, + ui64 resultRowsLimit, ui64 resultSizeLimit, std::vector& resultSets) { + return new TRunScriptActorMock(std::move(request), promise, resultRowsLimit, resultSizeLimit, resultSets); } } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/actors.h b/ydb/tests/tools/kqprun/src/actors.h index f6cbf8b43bef..9e7a251d14ff 100644 --- a/ydb/tests/tools/kqprun/src/actors.h +++ b/ydb/tests/tools/kqprun/src/actors.h @@ -4,6 +4,8 @@ namespace NKqpRun { -NActors::IActor* CreateRunScriptActorMock(THolder request, NThreading::TPromise promise, ui64 resultSizeLimit); +NActors::IActor* CreateRunScriptActorMock(THolder request, + NThreading::TPromise promise, + ui64 resultRowsLimit, ui64 resultSizeLimit, std::vector& resultSets); } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/common.h b/ydb/tests/tools/kqprun/src/common.h index ef4b490f8bd1..1d57272b2dd9 100644 --- a/ydb/tests/tools/kqprun/src/common.h +++ b/ydb/tests/tools/kqprun/src/common.h @@ -12,23 +12,28 @@ namespace NKqpRun { struct TYdbSetupSettings { TString DomainName = "Root"; - bool TraceOpt = false; + bool TraceOptEnabled = false; TMaybe LogOutputFile; TString YqlToken; - NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry = nullptr; + TIntrusivePtr FunctionRegistry = nullptr; NKikimrConfig::TAppConfig AppConfig; }; struct TRunnerOptions { + enum class ETraceOptType { + Disabled, + Scheme, + Script, + All, + }; + enum class EResultOutputFormat { RowsJson, // Rows in json format FullJson, // Columns, rows and types in json format }; - i64 ResultsRowsLimit = 1000; - IOutputStream* ResultOutput = &Cout; IOutputStream* SchemeQueryAstOutput = nullptr; IOutputStream* ScriptQueryAstOutput = nullptr; @@ -36,6 +41,7 @@ struct TRunnerOptions { EResultOutputFormat ResultOutputFormat = EResultOutputFormat::RowsJson; NYdb::NConsoleClient::EOutputFormat PlanOutputFormat = NYdb::NConsoleClient::EOutputFormat::Default; + ETraceOptType TraceOptType = ETraceOptType::Disabled; TYdbSetupSettings YdbSettings; }; diff --git a/ydb/tests/tools/kqprun/src/kqp_runner.cpp b/ydb/tests/tools/kqprun/src/kqp_runner.cpp index d72d461c9c58..b4eacc1895fe 100644 --- a/ydb/tests/tools/kqprun/src/kqp_runner.cpp +++ b/ydb/tests/tools/kqprun/src/kqp_runner.cpp @@ -21,8 +21,11 @@ class TKqpRunner::TImpl { {} bool ExecuteSchemeQuery(const TString& query) const { + StartSchemeTraceOpt(); + TSchemeMeta meta; TRequestResult status = YdbSetup_.SchemeQueryRequest(query, meta); + TYdbSetup::StopTraceOpt(); PrintSchemeQueryAst(meta.Ast); @@ -35,6 +38,8 @@ class TKqpRunner::TImpl { } bool ExecuteScript(const TString& script, NKikimrKqp::EQueryAction action, const TString& traceId) { + StartScriptTraceOpt(); + TRequestResult status = YdbSetup_.ScriptRequest(script, action, traceId, ExecutionOperation_); if (!status.IsSuccess()) { @@ -45,9 +50,12 @@ class TKqpRunner::TImpl { return WaitScriptExecutionOperation(); } - bool ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) const { + bool ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) { + StartScriptTraceOpt(); + TQueryMeta meta; - TRequestResult status = YdbSetup_.QueryRequest(query, action, traceId, meta); + TRequestResult status = YdbSetup_.QueryRequest(query, action, traceId, meta, ResultSets_); + TYdbSetup::StopTraceOpt(); PrintScriptAst(meta.Ast); @@ -61,22 +69,32 @@ class TKqpRunner::TImpl { return true; } - bool WriteScriptResults() const { + bool FetchScriptResults() { + TYdbSetup::StopTraceOpt(); + + ResultSets_.resize(ExecutionMeta_.ResultSetsCount); for (i32 resultSetId = 0; resultSetId < ExecutionMeta_.ResultSetsCount; ++resultSetId) { - Ydb::ResultSet resultSet; - TRequestResult status = YdbSetup_.FetchScriptExecutionResultsRequest(ExecutionOperation_, resultSetId, Options_.ResultsRowsLimit, resultSet); + TRequestResult status = YdbSetup_.FetchScriptExecutionResultsRequest(ExecutionOperation_, resultSetId, ResultSets_[resultSetId]); if (!status.IsSuccess()) { Cerr << CerrColors_.Red() << "Failed to fetch result set with id " << resultSetId << ", reason:" << CerrColors_.Default() << Endl << status.ToString() << Endl; return false; } - - PrintScriptResult(resultSet); } return true; } + void PrintScriptResults() const { + Cout << CoutColors_.Cyan() << "Writing script query results" << CoutColors_.Default() << Endl; + for (size_t i = 0; i < ResultSets_.size(); ++i) { + if (ResultSets_.size() > 1) { + *Options_.ResultOutput << CoutColors_.Cyan() << "Result set " << i + 1 << ":" << CoutColors_.Default() << Endl; + } + PrintScriptResult(ResultSets_[i]); + } + } + private: bool WaitScriptExecutionOperation() { TRequestResult status; @@ -107,6 +125,18 @@ class TKqpRunner::TImpl { return true; } + void StartSchemeTraceOpt() const { + if (Options_.TraceOptType == TRunnerOptions::ETraceOptType::All || Options_.TraceOptType == TRunnerOptions::ETraceOptType::Scheme) { + YdbSetup_.StartTraceOpt(); + } + } + + void StartScriptTraceOpt() const { + if (Options_.TraceOptType == TRunnerOptions::ETraceOptType::All || Options_.TraceOptType == TRunnerOptions::ETraceOptType::Script) { + YdbSetup_.StartTraceOpt(); + } + } + void PrintSchemeQueryAst(const TString& ast) const { if (Options_.SchemeQueryAstOutput) { Cout << CoutColors_.Cyan() << "Writing scheme query ast" << CoutColors_.Default() << Endl; @@ -122,7 +152,7 @@ class TKqpRunner::TImpl { } void PrintScriptPlan(const TString& plan) const { - if (Options_.ScriptQueryAstOutput) { + if (Options_.ScriptQueryPlanOutput) { Cout << CoutColors_.Cyan() << "Writing script query plan" << CoutColors_.Default() << Endl; NYdb::NConsoleClient::TQueryPlanPrinter printer(Options_.PlanOutputFormat, true, *Options_.ScriptQueryPlanOutput); @@ -132,9 +162,17 @@ class TKqpRunner::TImpl { void PrintScriptResult(const Ydb::ResultSet& resultSet) const { switch (Options_.ResultOutputFormat) { - case TRunnerOptions::EResultOutputFormat::RowsJson: - Options_.ResultOutput->Write(NYdb::FormatResultSetJson(resultSet, NYdb::EBinaryStringEncoding::Unicode)); + case TRunnerOptions::EResultOutputFormat::RowsJson: { + NYdb::TResultSet result(resultSet); + NYdb::TResultSetParser parser(result); + while (parser.TryNextRow()) { + NJsonWriter::TBuf writer(NJsonWriter::HEM_UNSAFE, Options_.ResultOutput); + writer.SetWriteNanAsString(true); + NYdb::FormatResultRowJson(parser, result.GetColumnsMeta(), writer, NYdb::EBinaryStringEncoding::Unicode); + *Options_.ResultOutput << Endl; + } break; + } case TRunnerOptions::EResultOutputFormat::FullJson: resultSet.PrintJSON(*Options_.ResultOutput); @@ -151,6 +189,7 @@ class TKqpRunner::TImpl { TString ExecutionOperation_; TExecutionMeta ExecutionMeta_; + std::vector ResultSets_; }; @@ -172,8 +211,12 @@ bool TKqpRunner::ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction act return Impl_->ExecuteQuery(query, action, traceId); } -bool TKqpRunner::WriteScriptResults() const { - return Impl_->WriteScriptResults(); +bool TKqpRunner::FetchScriptResults() { + return Impl_->FetchScriptResults(); +} + +void TKqpRunner::PrintScriptResults() const { + Impl_->PrintScriptResults(); } } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/kqp_runner.h b/ydb/tests/tools/kqprun/src/kqp_runner.h index f2eef77bb076..de01588dce74 100644 --- a/ydb/tests/tools/kqprun/src/kqp_runner.h +++ b/ydb/tests/tools/kqprun/src/kqp_runner.h @@ -15,7 +15,9 @@ class TKqpRunner { bool ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) const; - bool WriteScriptResults() const; + bool FetchScriptResults(); + + void PrintScriptResults() const; private: class TImpl; diff --git a/ydb/tests/tools/kqprun/src/ydb_setup.cpp b/ydb/tests/tools/kqprun/src/ydb_setup.cpp index 7340de37bbe2..031522a744f0 100644 --- a/ydb/tests/tools/kqprun/src/ydb_setup.cpp +++ b/ydb/tests/tools/kqprun/src/ydb_setup.cpp @@ -104,7 +104,7 @@ class TYdbSetup::TImpl { } auto functionRegistryFactory = [this](const NKikimr::NScheme::TTypeRegistry&) { - return Settings_.FunctionRegistry; + return Settings_.FunctionRegistry.Get(); }; serverSettings.SetFrFactory(functionRegistryFactory); @@ -140,7 +140,7 @@ class TYdbSetup::TImpl { } void InitializeYqlLogger() { - if (!Settings_.TraceOpt) { + if (!Settings_.TraceOptEnabled) { return; } @@ -159,7 +159,7 @@ class TYdbSetup::TImpl { entry->SetLevel(NActors::NLog::PRI_TRACE); } - NYql::NLog::InitLogger(CreateLogBackend()); + NYql::NLog::InitLogger(NActors::CreateNullBackend()); } public: @@ -184,12 +184,14 @@ class TYdbSetup::TImpl { return RunKqpProxyRequest(std::move(event)); } - NKikimr::NKqp::TEvKqp::TEvQueryResponse::TPtr QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) const { + NKikimr::NKqp::TEvKqp::TEvQueryResponse::TPtr QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, std::vector& resultSets) const { auto event = MakeHolder(); FillScriptRequest(query, action, traceId, event->Record); auto promise = NThreading::NewPromise(); - GetRuntime()->Register(CreateRunScriptActorMock(std::move(event), promise, Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultSizeLimit())); + auto rowsLimit = Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultRowsLimit(); + auto sizeLimit = Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultSizeLimit(); + GetRuntime()->Register(CreateRunScriptActorMock(std::move(event), promise, rowsLimit, sizeLimit, resultSets)); return promise.GetFuture().GetValueSync(); } @@ -201,17 +203,30 @@ class TYdbSetup::TImpl { return RunKqpProxyRequest(std::move(event)); } - NKikimr::NKqp::TEvKqp::TEvFetchScriptResultsResponse::TPtr FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, i64 limit) const { + NKikimr::NKqp::TEvKqp::TEvFetchScriptResultsResponse::TPtr FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId) const { TString executionId = *NKikimr::NKqp::ScriptExecutionIdFromOperation(operation); NActors::TActorId edgeActor = GetRuntime()->AllocateEdgeActor(); - NActors::IActor* fetchActor = NKikimr::NKqp::CreateGetScriptExecutionResultActor(edgeActor, Settings_.DomainName, executionId, resultSetId, 0, limit); + auto rowsLimit = Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultRowsLimit(); + NActors::IActor* fetchActor = NKikimr::NKqp::CreateGetScriptExecutionResultActor(edgeActor, Settings_.DomainName, executionId, resultSetId, 0, rowsLimit ? rowsLimit : std::numeric_limits::max()); GetRuntime()->Register(fetchActor); return GetRuntime()->GrabEdgeEvent(edgeActor); } + void StartTraceOpt() const { + if (!Settings_.TraceOptEnabled) { + ythrow yexception() << "Trace opt was disabled"; + } + + NYql::NLog::YqlLogger().ResetBackend(CreateLogBackend()); + } + + static void StopTraceOpt() { + NYql::NLog::YqlLogger().ResetBackend(NActors::CreateNullBackend()); + } + private: NActors::TTestActorRuntime* GetRuntime() const { return Server_->GetRuntime(); @@ -308,8 +323,8 @@ TRequestResult TYdbSetup::ScriptRequest(const TString& script, NKikimrKqp::EQuer return TRequestResult(scriptExecutionOperation->Get()->Status, scriptExecutionOperation->Get()->Issues); } -TRequestResult TYdbSetup::QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta) const { - auto queryOperationResponse = Impl_->QueryRequest(query, action, traceId)->Get()->Record.GetRef(); +TRequestResult TYdbSetup::QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta, std::vector& resultSets) const { + auto queryOperationResponse = Impl_->QueryRequest(query, action, traceId, resultSets)->Get()->Record.GetRef(); meta.Ast = queryOperationResponse.GetResponse().GetQueryAst(); meta.Plan = queryOperationResponse.GetResponse().GetQueryPlan(); @@ -339,8 +354,8 @@ TRequestResult TYdbSetup::GetScriptExecutionOperationRequest(const TString& oper return TRequestResult(scriptExecutionOperation->Get()->Status, scriptExecutionOperation->Get()->Issues); } -TRequestResult TYdbSetup::FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, i64 limit, Ydb::ResultSet& resultSet) const { - auto scriptExecutionResults = Impl_->FetchScriptExecutionResultsRequest(operation, resultSetId, limit)->Get()->Record; +TRequestResult TYdbSetup::FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, Ydb::ResultSet& resultSet) const { + auto scriptExecutionResults = Impl_->FetchScriptExecutionResultsRequest(operation, resultSetId)->Get()->Record; resultSet = scriptExecutionResults.GetResultSet(); @@ -350,4 +365,12 @@ TRequestResult TYdbSetup::FetchScriptExecutionResultsRequest(const TString& oper return TRequestResult(scriptExecutionResults.GetStatus(), issues); } +void TYdbSetup::StartTraceOpt() const { + Impl_->StartTraceOpt(); +} + +void TYdbSetup::StopTraceOpt() { + TYdbSetup::TImpl::StopTraceOpt(); +} + } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/ydb_setup.h b/ydb/tests/tools/kqprun/src/ydb_setup.h index c375d4f3326a..68a00058a3fd 100644 --- a/ydb/tests/tools/kqprun/src/ydb_setup.h +++ b/ydb/tests/tools/kqprun/src/ydb_setup.h @@ -51,11 +51,15 @@ class TYdbSetup { TRequestResult ScriptRequest(const TString& script, NKikimrKqp::EQueryAction action, const TString& traceId, TString& operation) const; - TRequestResult QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta) const; + TRequestResult QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta, std::vector& resultSets) const; TRequestResult GetScriptExecutionOperationRequest(const TString& operation, TExecutionMeta& meta) const; - TRequestResult FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, i64 limit, Ydb::ResultSet& resultSet) const; + TRequestResult FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, Ydb::ResultSet& resultSet) const; + + void StartTraceOpt() const; + + static void StopTraceOpt(); private: class TImpl;