diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index 6d4ded2774d2..3448c93ee690 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -18,8 +18,11 @@ ydb/core/quoter/ut QuoterWithKesusTest.PrefetchCoefficient ydb/core/kafka_proxy/ut KafkaProtocol.CreatePartitionsScenario ydb/core/kafka_proxy/ut KafkaProtocol.ProduceScenario ydb/core/kqp/provider/ut KikimrIcGateway.TestLoadBasicSecretValueFromExternalDataSourceMetadata -ydb/core/kqp/ut/federated_query/generic * -ydb/core/kqp/ut/olap * +ydb/core/kqp/ut/olap KqpOlap.IndexesActualization +ydb/core/kqp/ut/olap KqpOlap.BlobsSharing* +ydb/core/kqp/ut/olap KqpOlap.ScanQueryOltpAndOlap +ydb/core/kqp/ut/olap KqpOlap.StatsUsageWithTTL +ydb/core/kqp/ut/olap KqpOlap.YqlScriptOltpAndOlap ydb/core/kqp/ut/pg KqpPg.CreateIndex ydb/core/kqp/ut/query KqpLimits.QueryReplySize ydb/core/kqp/ut/query KqpQuery.QueryTimeout @@ -29,7 +32,6 @@ ydb/core/kqp/ut/scheme KqpScheme.QueryWithAlter ydb/core/kqp/ut/scheme [44/50]* ydb/core/kqp/ut/service KqpQueryService.ExecuteQueryPgTableSelect ydb/core/kqp/ut/service KqpQueryService.QueryOnClosedSession -ydb/core/kqp/ut/service KqpQueryServiceScripts.ForgetScriptExecutionRace ydb/core/kqp/ut/service KqpService.CloseSessionsWithLoad ydb/core/kqp/ut/service [38/50]* ydb/core/tx/columnshard/ut_schema TColumnShardTestSchema.ForgetAfterFail diff --git a/ydb/core/base/events.h b/ydb/core/base/events.h index 97f89dc978a2..93c28aa34edf 100644 --- a/ydb/core/base/events.h +++ b/ydb/core/base/events.h @@ -173,6 +173,7 @@ struct TKikimrEvents : TEvents { ES_GRAPH, ES_REPLICATION_SERVICE, ES_CHANGE_EXCHANGE, + ES_S3_FILE_QUEUE, }; }; diff --git a/ydb/core/driver_lib/run/config.h b/ydb/core/driver_lib/run/config.h index 2cd944329a68..ae040faa9e2c 100644 --- a/ydb/core/driver_lib/run/config.h +++ b/ydb/core/driver_lib/run/config.h @@ -125,6 +125,7 @@ union TBasicKikimrServicesMask { TBasicKikimrServicesMask() { EnableAll(); + EnableDatabaseMetadataCache = false; } }; diff --git a/ydb/core/external_sources/object_storage.cpp b/ydb/core/external_sources/object_storage.cpp index e93f9603a2e0..b84712c95927 100644 --- a/ydb/core/external_sources/object_storage.cpp +++ b/ydb/core/external_sources/object_storage.cpp @@ -28,19 +28,20 @@ struct TObjectStorageExternalSource : public IExternalSource { const NKikimrExternalSources::TGeneral& general) const override { NKikimrExternalSources::TObjectStorage objectStorage; for (const auto& [key, value]: general.attributes()) { - if (key == "format") { + auto lowerKey = to_lower(key); + if (lowerKey == "format") { objectStorage.set_format(value); - } else if (key == "compression") { + } else if (lowerKey == "compression") { objectStorage.set_compression(value); } else if (key.StartsWith("projection.") || key == "storage.location.template") { objectStorage.mutable_projection()->insert({key, value}); - } else if (key == "partitioned_by") { + } else if (lowerKey == "partitioned_by") { auto json = NSc::TValue::FromJsonThrow(value); for (const auto& column: json.GetArray()) { *objectStorage.add_partitioned_by() = column; } - } else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "csv_delimiter"sv}, key)) { - objectStorage.mutable_format_setting()->insert({key, value}); + } else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "csv_delimiter"sv}, lowerKey)) { + objectStorage.mutable_format_setting()->insert({lowerKey, value}); } else { ythrow TExternalSourceException() << "Unknown attribute " << key; } diff --git a/ydb/core/fq/libs/actors/clusters_from_connections.cpp b/ydb/core/fq/libs/actors/clusters_from_connections.cpp index 8405473860f0..066668c574a9 100644 --- a/ydb/core/fq/libs/actors/clusters_from_connections.cpp +++ b/ydb/core/fq/libs/actors/clusters_from_connections.cpp @@ -216,17 +216,14 @@ void AddClustersFromConnections( switch (conn.content().setting().connection_case()) { case FederatedQuery::ConnectionSetting::kYdbDatabase: { const auto& db = conn.content().setting().ydb_database(); - auto* clusterCfg = gatewaysConfig.MutableYdb()->AddClusterMapping(); + auto* clusterCfg = gatewaysConfig.MutableGeneric()->AddClusterMapping(); + clusterCfg->SetKind(NYql::NConnector::NApi::EDataSourceKind::YDB); + clusterCfg->SetProtocol(NYql::NConnector::NApi::EProtocol::NATIVE); clusterCfg->SetName(connectionName); - clusterCfg->SetId(db.database_id()); - if (db.database()) - clusterCfg->SetDatabase(db.database()); - if (db.endpoint()) - clusterCfg->SetEndpoint(db.endpoint()); - clusterCfg->SetSecure(db.secure()); - clusterCfg->SetAddBearerToToken(common.GetUseBearerForYdb()); + clusterCfg->SetDatabaseId(db.database_id()); + clusterCfg->SetUseSsl(!common.GetDisableSslForGenericDataSources()); FillClusterAuth(*clusterCfg, db.auth(), authToken, accountIdSignatures); - clusters.emplace(connectionName, YdbProviderName); + clusters.emplace(connectionName, GenericProviderName); break; } case FederatedQuery::ConnectionSetting::kClickhouseCluster: { diff --git a/ydb/core/fq/libs/actors/database_resolver.cpp b/ydb/core/fq/libs/actors/database_resolver.cpp index b0858f65d46e..fe36ef9cf4d3 100644 --- a/ydb/core/fq/libs/actors/database_resolver.cpp +++ b/ydb/core/fq/libs/actors/database_resolver.cpp @@ -1,5 +1,6 @@ #include "database_resolver.h" +#include #include #include #include @@ -98,8 +99,6 @@ class TResponseProcessor : public TActorBootstrapped } void DieOnTtl() { - Success = false; - auto errorMsg = TStringBuilder() << "Could not resolve database ids: "; bool firstUnresolvedDbId = true; for (const auto& [_, params]: Requests) { @@ -112,46 +111,41 @@ class TResponseProcessor : public TActorBootstrapped } errorMsg << " in " << ResolvingTtl << " seconds."; LOG_E("ResponseProcessor::DieOnTtl: errorMsg=" << errorMsg); - - SendResolvedEndpointsAndDie(errorMsg); + Issues.AddIssue(errorMsg); + SendResolvedEndpointsAndDie(); } - void SendResolvedEndpointsAndDie(const TString& errorMsg) { - NYql::TIssues issues; - if (errorMsg) { - issues.AddIssue(errorMsg); - } - + void SendResolvedEndpointsAndDie() { Send(Sender, new TEvents::TEvEndpointResponse( - NYql::TDatabaseResolverResponse(std::move(DatabaseId2Description), Success, issues))); + NYql::TDatabaseResolverResponse(std::move(DatabaseId2Description), Issues.Empty(), Issues))); PassAway(); LOG_D("ResponseProcessor::SendResolvedEndpointsAndDie: passed away"); } void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev) { - TString errorMessage; TMaybe result; const auto requestIter = Requests.find(ev->Get()->Request); HandledIds++; - LOG_T("ResponseProcessor::Handle(HttpIncomingResponse): got MDB API response: code=" << ev->Get()->Response->Status); + LOG_T("ResponseProcessor::Handle(HttpIncomingResponse): got API response: code=" << ev->Get()->Response->Status); try { - HandleResponse(ev, requestIter, errorMessage, result); + HandleResponse(ev, requestIter, result); } catch (...) { const TString msg = TStringBuilder() << "error while response processing, params " << ((requestIter != Requests.end()) ? requestIter->second.ToDebugString() : TString{"unknown"}) << ", details: " << CurrentExceptionMessage(); LOG_E("ResponseProccessor::Handle(TEvHttpIncomingResponse): " << msg); + Issues.AddIssue(msg); } LOG_T("ResponseProcessor::Handle(HttpIncomingResponse): progress: " << DatabaseId2Description.size() << " of " << Requests.size() << " requests are done"); if (HandledIds == Requests.size()) { - SendResolvedEndpointsAndDie(errorMessage); + SendResolvedEndpointsAndDie(); } } @@ -160,18 +154,25 @@ class TResponseProcessor : public TActorBootstrapped void HandleResponse( NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev, const TRequestMap::const_iterator& requestIter, - TString& errorMessage, TMaybe& result) - { - if (ev->Get()->Error.empty() && (ev->Get()->Response && ev->Get()->Response->Status == "200")) { - errorMessage = HandleSuccessfulResponse(ev, requestIter, result); + { + TString errorMessage; + + if (requestIter == Requests.end()) { + // Requests are guaranteed to be kept in within TResponseProcessor until the response arrives. + // If there is no appropriate request, it's a fatal error. + errorMessage = "Invariant violation: unknown request"; } else { - errorMessage = HandleFailedResponse(ev, requestIter); + if (ev->Get()->Error.empty() && (ev->Get()->Response && ev->Get()->Response->Status == "200")) { + errorMessage = HandleSuccessfulResponse(ev, *requestIter, result); + } else { + errorMessage = HandleFailedResponse(ev, *requestIter); + } } if (errorMessage) { + Issues.AddIssue(errorMessage); LOG_E("ResponseProcessor::Handle(HttpIncomingResponse): error=" << errorMessage); - Success = false; } else { const auto& params = requestIter->second; auto key = std::make_tuple(params.Id, params.DatabaseType, params.DatabaseAuth); @@ -191,17 +192,13 @@ class TResponseProcessor : public TActorBootstrapped TString HandleSuccessfulResponse( NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev, - const TRequestMap::const_iterator& requestIter, + const TRequestMap::value_type& requestWithParams, TMaybe& result ) { - if (requestIter == Requests.end()) { - return "unknown request"; - } - NJson::TJsonReaderConfig jsonConfig; NJson::TJsonValue databaseInfo; - const auto& params = requestIter->second; + const auto& params = requestWithParams.second; const bool parseJsonOk = NJson::ReadJsonTree(ev->Get()->Response->Body, &jsonConfig, &databaseInfo); TParsers::const_iterator parserIt; if (parseJsonOk && (parserIt = Parsers.find(params.DatabaseType)) != Parsers.end()) { @@ -232,37 +229,37 @@ class TResponseProcessor : public TActorBootstrapped TString HandleFailedResponse( NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev, - const TRequestMap::const_iterator& requestIter + const TRequestMap::value_type& requestWithParams ) const { - if (requestIter == Requests.end()) { - return "unknown request"; - } + auto sb = TStringBuilder() + << "Error while trying to resolve managed " << ToString(requestWithParams.second.DatabaseType) + << " database with id " << requestWithParams.second.Id << " via HTTP request to" + << ": endpoint '" << requestWithParams.first->Host << "'" + << ", url '" << requestWithParams.first->URL << "'" + << ": "; + + // Handle network error (when the response is empty) + if (!ev->Get()->Response) { + return sb << ev->Get()->Error; + } + // Handle unauthenticated error const auto& status = ev->Get()->Response->Status; - if (status == "403") { - return TStringBuilder() << "You have no permission to resolve database id into database endpoint. " + DetailedPermissionsError(requestIter->second); + return sb << "you have no permission to resolve database id into database endpoint." + DetailedPermissionsError(requestWithParams.second); } - auto errorMessage = ev->Get()->Error; - - const TString error = TStringBuilder() - << "Cannot resolve database id (status = " << status << "). " - << "Response body from " << ev->Get()->Request->URL << ": " << (ev->Get()->Response ? ev->Get()->Response->Body : "empty"); - if (!errorMessage.empty()) { - errorMessage += '\n'; - } - errorMessage += error; - - return errorMessage; + // Unexpected error. Add response body for debug + return sb << Endl + << "Status: " << status << Endl + << "Response body: " << ev->Get()->Response->Body; } TString DetailedPermissionsError(const TResolveParams& params) const { - if (params.DatabaseType == EDatabaseType::ClickHouse || params.DatabaseType == EDatabaseType::PostgreSQL) { auto mdbTypeStr = NYql::DatabaseTypeLowercase(params.DatabaseType); - return TStringBuilder() << "Please check that your service account has role " << + return TStringBuilder() << " Please check that your service account has role " << "`managed-" << mdbTypeStr << ".viewer`."; } return {}; @@ -275,7 +272,7 @@ class TResponseProcessor : public TActorBootstrapped const NYql::IMdbEndpointGenerator::TPtr MdbEndpointGenerator; TDatabaseResolverResponse::TDatabaseDescriptionMap DatabaseId2Description; size_t HandledIds = 0; - bool Success = true; + NYql::TIssues Issues; const TParsers& Parsers; TDuration ResolvingTtl = TDuration::Seconds(30); //TODO: Use cfg }; @@ -312,7 +309,12 @@ class TDatabaseResolver: public TActor } Y_ENSURE(endpoint); - return TDatabaseDescription{endpoint, "", 0, database, secure}; + + TVector split = StringSplitter(endpoint).Split(':'); + + Y_ENSURE(split.size() == 2); + + return TDatabaseDescription{endpoint, split[0], FromString(split[1]), database, secure}; }; Parsers[NYql::EDatabaseType::Ydb] = ydbParser; Parsers[NYql::EDatabaseType::DataStreams] = [ydbParser]( @@ -327,9 +329,11 @@ class TDatabaseResolver: public TActor if (!isDedicatedDb && ret.Endpoint.StartsWith("ydb.")) { // Replace "ydb." -> "yds." ret.Endpoint[2] = 's'; + ret.Host[2] = 's'; } if (isDedicatedDb) { ret.Endpoint = "u-" + ret.Endpoint; + ret.Host = "u-" + ret.Host; } return ret; }; @@ -486,6 +490,7 @@ class TDatabaseResolver: public TActor try { TString url; if (IsIn({NYql::EDatabaseType::Ydb, NYql::EDatabaseType::DataStreams }, databaseType)) { + YQL_ENSURE(ev->Get()->YdbMvpEndpoint.Size() > 0, "empty YDB MVP Endpoint"); url = TUrlBuilder(ev->Get()->YdbMvpEndpoint + "/database") .AddUrlParam("databaseId", databaseId) .Build(); @@ -497,7 +502,6 @@ class TDatabaseResolver: public TActor .AddPathComponent("hosts") .Build(); } - LOG_D("ResponseProccessor::Handle(EndpointRequest): start GET request: " << url); NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet(url); @@ -507,6 +511,8 @@ class TDatabaseResolver: public TActor httpRequest->Set("Authorization", token); } + LOG_D("ResponseProccessor::Handle(EndpointRequest): start GET request: " << "url: " << httpRequest->URL); + requests[httpRequest] = TResolveParams{databaseId, databaseType, databaseAuth}; } catch (const std::exception& e) { const TString msg = TStringBuilder() << "error while preparing to resolve database id: " << databaseId diff --git a/ydb/core/fq/libs/actors/proxy_private.h b/ydb/core/fq/libs/actors/proxy_private.h index 4be79ef3277f..9a55b8a513ba 100644 --- a/ydb/core/fq/libs/actors/proxy_private.h +++ b/ydb/core/fq/libs/actors/proxy_private.h @@ -22,8 +22,6 @@ namespace NKikimr { namespace NFq { -NActors::TActorId MakeYqPrivateProxyId(); - NActors::IActor* CreateYqlAnalyticsPrivateProxy( const NConfig::TPrivateProxyConfig& privateProxyConfig, TIntrusivePtr timeProvider, diff --git a/ydb/core/fq/libs/actors/run_actor.cpp b/ydb/core/fq/libs/actors/run_actor.cpp index 4ead981b5004..80790ccb9f7e 100644 --- a/ydb/core/fq/libs/actors/run_actor.cpp +++ b/ydb/core/fq/libs/actors/run_actor.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1940,11 +1939,7 @@ class TRunActor : public NActors::TActorBootstrapped { } { - dataProvidersInit.push_back(GetYdbDataProviderInitializer(Params.YqSharedResources->UserSpaceYdbDriver, Params.CredentialsFactory, dbResolver)); - } - - { - dataProvidersInit.push_back(GetGenericDataProviderInitializer(Params.ConnectorClient, dbResolver)); + dataProvidersInit.push_back(GetGenericDataProviderInitializer(Params.ConnectorClient, dbResolver, Params.CredentialsFactory)); } { diff --git a/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp b/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp index 4055825c0c9b..50cff1b212a5 100644 --- a/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp +++ b/ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp @@ -14,7 +14,22 @@ namespace { using namespace NKikimr; using namespace NFq; -TString NoPermissionStr = "You have no permission to resolve database id into database endpoint. "; +TString MakeErrorPrefix( + const TString& host, + const TString& url, + const TString& databaseId, + const NYql::EDatabaseType& databaseType) { + TStringBuilder ss; + + return TStringBuilder() + << "Error while trying to resolve managed " << ToString(databaseType) + << " database with id " << databaseId << " via HTTP request to" + << ": endpoint '" << host << "'" + << ", url '" << url << "'" + << ": "; +} + +TString NoPermissionStr = "you have no permission to resolve database id into database endpoint."; struct TTestBootstrap : public TTestActorRuntime { NConfig::TCheckpointCoordinatorConfig Settings; @@ -113,7 +128,9 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { const TString& status, const TString& responseBody, const NYql::TDatabaseResolverResponse::TDatabaseDescription& description, - const NYql::TIssues& issues) + const NYql::TIssues& issues, + const TString& error = "" + ) { TTestBootstrap bootstrap; @@ -122,16 +139,16 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { databaseAuth.Protocol = protocol; TString databaseId{"etn021us5r9rhld1vgbh"}; - auto requestIdAnddatabaseType = std::make_pair(databaseId, databaseType); + auto requestIdAndDatabaseType = std::make_pair(databaseId, databaseType); bootstrap.Send(new IEventHandle( bootstrap.DatabaseResolver, bootstrap.AsyncResolver, new NFq::TEvents::TEvEndpointRequest( NYql::IDatabaseAsyncResolver::TDatabaseAuthMap( - {std::make_pair(requestIdAnddatabaseType, databaseAuth)}), + {std::make_pair(requestIdAndDatabaseType, databaseAuth)}), TString("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod"), - TString("mdbGateway"), + TString("https://mdb.api.cloud.yandex.net:443"), TString("traceId"), NFq::MakeMdbEndpointGeneratorGeneric(true)))); @@ -144,18 +161,21 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { bootstrap.WaitForBootstrap(); - auto response = std::make_unique(nullptr); - response->Status = status; - response->Body = responseBody; + std::unique_ptr httpIncomingResponse; + if (!error) { + httpIncomingResponse = std::make_unique(nullptr); + httpIncomingResponse->Status = status; + httpIncomingResponse->Body = responseBody; + } bootstrap.Send(new IEventHandle( processorActorId, bootstrap.HttpProxy, - new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest->Request, response.release(), ""))); + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest->Request, httpIncomingResponse.release(), error))); NYql::TDatabaseResolverResponse::TDatabaseDescriptionMap result; if (status == "200") { - result[requestIdAnddatabaseType] = description; + result[requestIdAndDatabaseType] = description; } bootstrap.ExpectEvent(bootstrap.AsyncResolver, NFq::TEvents::TEvEndpointResponse( @@ -174,8 +194,8 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"ydb.serverless.yandexcloud.net:2135"}, - TString{""}, - 0, + TString{"ydb.serverless.yandexcloud.net"}, + 2135, TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), true }, @@ -183,6 +203,36 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { ); } + Y_UNIT_TEST(Ydb_Serverless_Timeout) { + NYql::TIssues issues{ + NYql::TIssue( + TStringBuilder{} << MakeErrorPrefix( + "ydbc.ydb.cloud.yandex.net:8789", + "/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::Ydb + ) << "Connection timeout" + ) + }; + + Test( + NYql::EDatabaseType::Ydb, + NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED, + "https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", + "", + "", + NYql::TDatabaseResolverResponse::TDatabaseDescription{ + TString{"ydb.serverless.yandexcloud.net:2135"}, + TString{"ydb.serverless.yandexcloud.net"}, + 2135, + TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), + true + }, + issues, + "Connection timeout" + ); + } + Y_UNIT_TEST(DataStreams_Serverless) { Test( NYql::EDatabaseType::DataStreams, @@ -195,12 +245,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"yds.serverless.yandexcloud.net:2135"}, - TString{""}, - 0, + TString{"yds.serverless.yandexcloud.net"}, + 2135, TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), true - }, - {} + }, + {} ); } @@ -217,8 +267,8 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { })", NYql::TDatabaseResolverResponse::TDatabaseDescription{ TString{"u-lb.etn021us5r9rhld1vgbh.ydb.mdb.yandexcloud.net:2135"}, - TString{""}, - 0, + TString{"u-lb.etn021us5r9rhld1vgbh.ydb.mdb.yandexcloud.net"}, + 2135, TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), true }, @@ -297,7 +347,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(ClickHouse_PermissionDenied) { NYql::TIssues issues{ NYql::TIssue( - TStringBuilder{} << NoPermissionStr << "Please check that your service account has role `managed-clickhouse.viewer`." + TStringBuilder{} << MakeErrorPrefix( + "mdb.api.cloud.yandex.net:443", + "/managed-clickhouse/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::ClickHouse + ) << NoPermissionStr << " Please check that your service account has role `managed-clickhouse.viewer`." ) }; @@ -365,7 +420,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(PostgreSQL_PermissionDenied) { NYql::TIssues issues{ NYql::TIssue( - TStringBuilder{} << NoPermissionStr << "Please check that your service account has role `managed-postgresql.viewer`." + TStringBuilder{} << MakeErrorPrefix( + "mdb.api.cloud.yandex.net:443", + "/managed-postgresql/v1/clusters/etn021us5r9rhld1vgbh/hosts", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::PostgreSQL + ) << NoPermissionStr << " Please check that your service account has role `managed-postgresql.viewer`." ) }; @@ -395,7 +455,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { Y_UNIT_TEST(DataStreams_PermissionDenied) { NYql::TIssues issues{ NYql::TIssue( - NoPermissionStr + TStringBuilder{} << MakeErrorPrefix( + "ydbc.ydb.cloud.yandex.net:8789", + "/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh", + "etn021us5r9rhld1vgbh", + NYql::EDatabaseType::DataStreams + ) << NoPermissionStr ) }; Test( @@ -412,6 +477,87 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) { issues ); } + + Y_UNIT_TEST(ResolveTwoDataStreamsFirstError) { + TTestBootstrap bootstrap; + + NYql::TDatabaseAuth databaseAuth; + databaseAuth.UseTls = true; + databaseAuth.Protocol = NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED; + + TString databaseId1{"etn021us5r9rhld1vgb1"}; + TString databaseId2{"etn021us5r9rhld1vgb2"}; + auto requestIdAndDatabaseType1 = std::make_pair(databaseId1, NYql::EDatabaseType::DataStreams); + auto requestIdAndDatabaseType2 = std::make_pair(databaseId2, NYql::EDatabaseType::DataStreams); + + bootstrap.Send(new IEventHandle( + bootstrap.DatabaseResolver, + bootstrap.AsyncResolver, + new NFq::TEvents::TEvEndpointRequest( + NYql::IDatabaseAsyncResolver::TDatabaseAuthMap({ + std::make_pair(requestIdAndDatabaseType1, databaseAuth), + std::make_pair(requestIdAndDatabaseType2, databaseAuth)}), + TString("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod"), + TString("https://mdb.api.cloud.yandex.net:443"), + TString("traceId"), + NFq::MakeMdbEndpointGeneratorGeneric(true)))); + + auto httpRequest1 = NHttp::THttpOutgoingRequest::CreateRequestGet("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb1"); + auto httpRequest2 = NHttp::THttpOutgoingRequest::CreateRequestGet("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb2"); + + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr httpOutgoingRequestHolder1 = bootstrap.GrabEdgeEvent(bootstrap.HttpProxy, TDuration::Seconds(10)); + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr httpOutgoingRequestHolder2 = bootstrap.GrabEdgeEvent(bootstrap.HttpProxy, TDuration::Seconds(10)); + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest* httpOutgoingRequest1 = httpOutgoingRequestHolder1.Get()->Get(); + NHttp::TEvHttpProxy::TEvHttpOutgoingRequest* httpOutgoingRequest2 = httpOutgoingRequestHolder2.Get()->Get(); + if (httpOutgoingRequest1->Request->URL != httpRequest1->URL) { + std::swap(httpOutgoingRequest1, httpOutgoingRequest2); + } + + NActors::TActorId processorActorId = httpOutgoingRequestHolder1->Sender; + bootstrap.WaitForBootstrap(); + + auto response1 = std::make_unique(nullptr); + response1->Status = "404"; + response1->Body = R"({"message":"Database not found"})"; + + bootstrap.Send(new IEventHandle( + processorActorId, + bootstrap.HttpProxy, + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest1->Request, response1.release(), ""))); + + auto response2 = std::make_unique(nullptr); + response2->Status = "200"; + response2->Body = R"({"endpoint":"grpcs://ydb.serverless.yandexcloud.net:2135/?database=/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"})"; + + bootstrap.Send(new IEventHandle( + processorActorId, + bootstrap.HttpProxy, + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest2->Request, response2.release(), ""))); + + NYql::TDatabaseResolverResponse::TDatabaseDescriptionMap result; + result[requestIdAndDatabaseType2] = NYql::TDatabaseResolverResponse::TDatabaseDescription{ + TString{"yds.serverless.yandexcloud.net:2135"}, + TString{"yds.serverless.yandexcloud.net"}, + 2135, + TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"), + true + }; + + NYql::TIssues issues{ + NYql::TIssue( + TStringBuilder() << MakeErrorPrefix( + "ydbc.ydb.cloud.yandex.net:8789", + "/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb1", + "etn021us5r9rhld1vgb1", + NYql::EDatabaseType::DataStreams)<< "\nStatus: 404\nResponse body: {\"message\":\"Database not found\"}" + ) + }; + + bootstrap.ExpectEvent(bootstrap.AsyncResolver, + NFq::TEvents::TEvEndpointResponse( + NYql::TDatabaseResolverResponse(std::move(result), false, issues))); + } + } } // namespace NFq diff --git a/ydb/core/fq/libs/actors/ya.make b/ydb/core/fq/libs/actors/ya.make index bcf906c56a51..42d277fbc98c 100644 --- a/ydb/core/fq/libs/actors/ya.make +++ b/ydb/core/fq/libs/actors/ya.make @@ -80,7 +80,6 @@ PEERDIR( ydb/library/yql/providers/pq/provider ydb/library/yql/providers/pq/task_meta ydb/library/yql/providers/s3/provider - ydb/library/yql/providers/ydb/provider ydb/library/yql/public/issue ydb/library/yql/public/issue/protos ydb/library/yql/sql/settings diff --git a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp index 5186054fdbbc..0bc2d773828f 100644 --- a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp +++ b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.cpp @@ -571,6 +571,16 @@ void TCheckpointCoordinator::Handle(NActors::TEvInterconnect::TEvNodeConnected:: } } +void TCheckpointCoordinator::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + CC_LOG_D("Handle undelivered"); + + if (const auto actorIt = AllActors.find(ev->Sender); actorIt != AllActors.end()) { + actorIt->second->EventsQueue.HandleUndelivered(ev); + } + + NYql::TTaskControllerImpl::OnUndelivered(ev); +} + void TCheckpointCoordinator::Handle(NActors::TEvents::TEvPoison::TPtr& ev) { CC_LOG_D("Got TEvPoison"); Send(ev->Sender, new NActors::TEvents::TEvPoisonTaken(), 0, ev->Cookie); diff --git a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h index a778a707a954..0aa477f295fa 100644 --- a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h +++ b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h @@ -60,6 +60,7 @@ class TCheckpointCoordinator : public NYql::TTaskControllerImpl::OnUndelivered) hFunc(NActors::TEvents::TEvWakeup, NYql::TTaskControllerImpl::OnWakeup) hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle) - hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle), + hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle) + hFunc(NActors::TEvents::TEvUndelivered, Handle) - ExceptionFunc(std::exception, HandleException) + , ExceptionFunc(std::exception, HandleException) ) static constexpr char ActorName[] = "YQ_CHECKPOINT_COORDINATOR"; diff --git a/ydb/core/fq/libs/compute/common/config.h b/ydb/core/fq/libs/compute/common/config.h index a869f893ba96..b038f8d815a7 100644 --- a/ydb/core/fq/libs/compute/common/config.h +++ b/ydb/core/fq/libs/compute/common/config.h @@ -164,8 +164,8 @@ class TComputeConfig { case FederatedQuery::ConnectionSetting::kObjectStorage: case FederatedQuery::ConnectionSetting::kClickhouseCluster: case FederatedQuery::ConnectionSetting::kPostgresqlCluster: - return true; case FederatedQuery::ConnectionSetting::kYdbDatabase: + return true; case FederatedQuery::ConnectionSetting::kDataStreams: case FederatedQuery::ConnectionSetting::kMonitoring: case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: diff --git a/ydb/core/fq/libs/compute/common/ut/ya.make b/ydb/core/fq/libs/compute/common/ut/ya.make index 536d94ff54ae..ae170608a8eb 100644 --- a/ydb/core/fq/libs/compute/common/ut/ya.make +++ b/ydb/core/fq/libs/compute/common/ut/ya.make @@ -11,6 +11,10 @@ SRCS( utils_ut.cpp ) +PEERDIR( + ydb/library/yql/public/udf/service/stub +) + YQL_LAST_ABI_VERSION() END() diff --git a/ydb/core/fq/libs/compute/common/utils.cpp b/ydb/core/fq/libs/compute/common/utils.cpp index 127bd26f1ecc..b790d8d8a8b4 100644 --- a/ydb/core/fq/libs/compute/common/utils.cpp +++ b/ydb/core/fq/libs/compute/common/utils.cpp @@ -1,8 +1,11 @@ #include "utils.h" #include +#include #include +#include + namespace NFq { using TAggregates = std::map>; @@ -621,7 +624,7 @@ void EnumeratePlansV2(NYson::TYsonWriter& writer, NJson::TJsonValue& value, ui32 } } -TString GetV1StatFromV2PlanV2(const TString& plan) { +TString GetV1StatFromV2PlanV2(const TString& plan, double* cpuUsage) { TStringStream out; NYson::TYsonWriter writer(&out); writer.OnBeginMap(); @@ -655,6 +658,9 @@ TString GetV1StatFromV2PlanV2(const TString& plan) { if (totals.CpuTimeUs.Sum) { writer.OnKeyedItem("cpu"); writer.OnStringScalar(FormatDurationUs(totals.CpuTimeUs.Sum)); + if (cpuUsage) { + *cpuUsage = totals.CpuTimeUs.Sum / 1000000.0; + } } if (totals.SourceCpuTimeUs.Sum) { writer.OnKeyedItem("scpu"); @@ -750,4 +756,140 @@ TPublicStat GetPublicStat(const TString& statistics) { return counters; } +struct TNoneStatProcessor : IPlanStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_NONE; + } + + TString ConvertPlan(TString& plan) override { + return plan; + } + + TString GetQueryStat(TString&, double& cpuUsage) override { + cpuUsage = 0.0; + return ""; + } + + TPublicStat GetPublicStat(TString&) override { + return TPublicStat{}; + } +}; + +struct TBasicStatProcessor : TNoneStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_BASIC; + } +}; + +struct TFullStatProcessor : IPlanStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_FULL; + } + + TString ConvertPlan(TString& plan) override { + return plan; + } + + TString GetQueryStat(TString& plan, double& cpuUsage) override { + return GetV1StatFromV2Plan(plan, &cpuUsage); + } + + TPublicStat GetPublicStat(TString& stat) override { + return NFq::GetPublicStat(stat); + } +}; + +struct TProfileStatProcessor : TFullStatProcessor { + Ydb::Query::StatsMode GetStatsMode() override { + return Ydb::Query::StatsMode::STATS_MODE_PROFILE; + } +}; + +struct TProdStatProcessor : TFullStatProcessor { + TString GetQueryStat(TString& plan, double& cpuUsage) override { + return GetPrettyStatistics(GetV1StatFromV2Plan(plan, &cpuUsage)); + } +}; + +std::unique_ptr CreateStatProcessor(const TString& statViewName) { + // disallow none and basic stat since they do not support metering + // if (statViewName == "stat_none") return std::make_unique(); + // if (statViewName == "stat_basc") return std::make_unique(); + if (statViewName == "stat_full") return std::make_unique(); + if (statViewName == "stat_prof") return std::make_unique(); + if (statViewName == "stat_prod") return std::make_unique(); + return std::make_unique(); +} + +PingTaskRequestBuilder::PingTaskRequestBuilder(const NConfig::TCommonConfig& commonConfig, std::unique_ptr&& processor) + : Compressor(commonConfig.GetQueryArtifactsCompressionMethod(), commonConfig.GetQueryArtifactsCompressionMinSize()) + , Processor(std::move(processor)) +{} + +Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build( + const Ydb::TableStats::QueryStats& queryStats, + const NYql::TIssues& issues, + std::optional computeStatus, + std::optional pendingStatusCode +) { + Fq::Private::PingTaskRequest pingTaskRequest = Build(queryStats); + + if (issues) { + NYql::IssuesToMessage(issues, pingTaskRequest.mutable_issues()); + } + + if (computeStatus) { + pingTaskRequest.set_status(*computeStatus); + } + + if (pendingStatusCode) { + pingTaskRequest.set_pending_status_code(*pendingStatusCode); + } + + return pingTaskRequest; +} + + +Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(const Ydb::TableStats::QueryStats& queryStats) { + return Build(queryStats.query_plan(), queryStats.query_ast()); +} + +Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(const TString& queryPlan, const TString& queryAst) { + Fq::Private::PingTaskRequest pingTaskRequest; + + Issues.Clear(); + + auto plan = queryPlan; + try { + plan = Processor->ConvertPlan(plan); + } catch(const NJson::TJsonException& ex) { + Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Error plan conversion: " << ex.what())); + } + + if (Compressor.IsEnabled()) { + auto [astCompressionMethod, astCompressed] = Compressor.Compress(queryAst); + pingTaskRequest.mutable_ast_compressed()->set_method(astCompressionMethod); + pingTaskRequest.mutable_ast_compressed()->set_data(astCompressed); + + auto [planCompressionMethod, planCompressed] = Compressor.Compress(plan); + pingTaskRequest.mutable_plan_compressed()->set_method(planCompressionMethod); + pingTaskRequest.mutable_plan_compressed()->set_data(planCompressed); + } else { + pingTaskRequest.set_ast(queryAst); + pingTaskRequest.set_plan(plan); + } + + CpuUsage = 0.0; + try { + auto stat = Processor->GetQueryStat(plan, CpuUsage); + pingTaskRequest.set_statistics(stat); + pingTaskRequest.set_dump_raw_statistics(true); + PublicStat = Processor->GetPublicStat(stat); + } catch(const NJson::TJsonException& ex) { + Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Error stat conversion: " << ex.what())); + } + + return pingTaskRequest; +} + } // namespace NFq diff --git a/ydb/core/fq/libs/compute/common/utils.h b/ydb/core/fq/libs/compute/common/utils.h index 4a61a45bf61a..47387490162d 100644 --- a/ydb/core/fq/libs/compute/common/utils.h +++ b/ydb/core/fq/libs/compute/common/utils.h @@ -1,8 +1,12 @@ #pragma once +#include + +#include #include #include #include + #include namespace NFq { @@ -43,4 +47,33 @@ struct TPublicStat { TPublicStat GetPublicStat(const TString& statistics); +struct IPlanStatProcessor { + virtual ~IPlanStatProcessor() = default; + virtual Ydb::Query::StatsMode GetStatsMode() = 0; + virtual TString ConvertPlan(TString& plan) = 0; + virtual TString GetQueryStat(TString& plan, double& cpuUsage) = 0; + virtual TPublicStat GetPublicStat(TString& stat) = 0; +}; + +std::unique_ptr CreateStatProcessor(const TString& statViewName); + +class PingTaskRequestBuilder { +public: + PingTaskRequestBuilder(const NConfig::TCommonConfig& commonConfig, std::unique_ptr&& processor); + Fq::Private::PingTaskRequest Build( + const Ydb::TableStats::QueryStats& queryStats, + const NYql::TIssues& issues, + std::optional computeStatus = std::nullopt, + std::optional pendingStatusCode = std::nullopt + ); + Fq::Private::PingTaskRequest Build(const Ydb::TableStats::QueryStats& queryStats); + Fq::Private::PingTaskRequest Build(const TString& queryPlan, const TString& queryAst); + NYql::TIssues Issues; + double CpuUsage = 0.0; + TPublicStat PublicStat; +private: + const TCompressor Compressor; + std::unique_ptr Processor; +}; + } // namespace NFq diff --git a/ydb/core/fq/libs/compute/common/ya.make b/ydb/core/fq/libs/compute/common/ya.make index d4a0c6ab2dae..f2fc3e00a5a5 100644 --- a/ydb/core/fq/libs/compute/common/ya.make +++ b/ydb/core/fq/libs/compute/common/ya.make @@ -9,6 +9,7 @@ SRCS( PEERDIR( library/cpp/json/yson ydb/core/fq/libs/config/protos + ydb/core/fq/libs/control_plane_storage/internal ydb/core/fq/libs/db_id_async_resolver_impl ydb/core/fq/libs/grpc ydb/core/fq/libs/shared_resources diff --git a/ydb/core/fq/libs/compute/ydb/actors_factory.cpp b/ydb/core/fq/libs/compute/ydb/actors_factory.cpp index 9db333a97da9..aa7d38d00fcb 100644 --- a/ydb/core/fq/libs/compute/ydb/actors_factory.cpp +++ b/ydb/core/fq/libs/compute/ydb/actors_factory.cpp @@ -9,6 +9,7 @@ #include "ydb_connector_actor.h" #include +#include namespace NFq { @@ -16,6 +17,7 @@ struct TActorFactory : public IActorFactory { TActorFactory(const NFq::TRunActorParams& params, const ::NYql::NCommon::TServiceCounters& counters) : Params(params) , Counters(counters) + , StatViewName(GetStatViewName()) {} std::unique_ptr CreatePinger(const NActors::TActorId& parent) const override { @@ -46,21 +48,22 @@ struct TActorFactory : public IActorFactory { std::unique_ptr CreateExecuter(const NActors::TActorId &parent, const NActors::TActorId &connector, const NActors::TActorId &pinger) const override { - return CreateExecuterActor(Params, parent, connector, pinger, Counters); + return CreateExecuterActor(Params, CreateStatProcessor()->GetStatsMode(), parent, connector, pinger, Counters); } std::unique_ptr CreateStatusTracker(const NActors::TActorId &parent, const NActors::TActorId &connector, const NActors::TActorId &pinger, const NYdb::TOperation::TOperationId& operationId) const override { - return CreateStatusTrackerActor(Params, parent, connector, pinger, operationId, Counters); + return CreateStatusTrackerActor(Params, parent, connector, pinger, operationId, CreateStatProcessor(), Counters); } std::unique_ptr CreateResultWriter(const NActors::TActorId& parent, const NActors::TActorId& connector, const NActors::TActorId& pinger, - const NKikimr::NOperationId::TOperationId& operationId) const override { - return CreateResultWriterActor(Params, parent, connector, pinger, operationId, Counters); + const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected) const override { + return CreateResultWriterActor(Params, parent, connector, pinger, operationId, operationEntryExpected, Counters); } std::unique_ptr CreateResourcesCleaner(const NActors::TActorId& parent, @@ -79,13 +82,50 @@ struct TActorFactory : public IActorFactory { std::unique_ptr CreateStopper(const NActors::TActorId& parent, const NActors::TActorId& connector, + const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId) const override { - return CreateStopperActor(Params, parent, connector, operationId, Counters); + return CreateStopperActor(Params, parent, connector, pinger, operationId, CreateStatProcessor(), Counters); + } + + std::unique_ptr CreateStatProcessor() const { + return NFq::CreateStatProcessor(StatViewName); + } + + TString GetStatViewName() { + auto p = Params.Sql.find("--fq_dev_hint_"); + if (p != Params.Sql.npos) { + p += 14; + auto p1 = Params.Sql.find("\n", p); + TString mode = Params.Sql.substr(p, p1 == Params.Sql.npos ? Params.Sql.npos : p1 - p); + if (mode) { + return mode; + } + } + + if (!Params.Config.GetControlPlaneStorage().GetDumpRawStatistics()) { + return "stat_prod"; + } + + switch (Params.Config.GetControlPlaneStorage().GetStatsMode()) { + case Ydb::Query::StatsMode::STATS_MODE_UNSPECIFIED: + return "stat_full"; + case Ydb::Query::StatsMode::STATS_MODE_NONE: + return "stat_none"; + case Ydb::Query::StatsMode::STATS_MODE_BASIC: + return "stat_basc"; + case Ydb::Query::StatsMode::STATS_MODE_FULL: + return "stat_full"; + case Ydb::Query::StatsMode::STATS_MODE_PROFILE: + return "stat_prof"; + default: + return "stat_full"; + } } private: NFq::TRunActorParams Params; ::NYql::NCommon::TServiceCounters Counters; + TString StatViewName; }; IActorFactory::TPtr CreateActorFactory(const NFq::TRunActorParams& params, const ::NYql::NCommon::TServiceCounters& counters) { diff --git a/ydb/core/fq/libs/compute/ydb/actors_factory.h b/ydb/core/fq/libs/compute/ydb/actors_factory.h index ae85da060f7a..4abaed2178ab 100644 --- a/ydb/core/fq/libs/compute/ydb/actors_factory.h +++ b/ydb/core/fq/libs/compute/ydb/actors_factory.h @@ -28,7 +28,8 @@ struct IActorFactory : public TThrRefBase { virtual std::unique_ptr CreateResultWriter(const NActors::TActorId& parent, const NActors::TActorId& connector, const NActors::TActorId& pinger, - const NKikimr::NOperationId::TOperationId& operationId) const = 0; + const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected) const = 0; virtual std::unique_ptr CreateResourcesCleaner(const NActors::TActorId& parent, const NActors::TActorId& connector, const NYdb::TOperation::TOperationId& operationId) const = 0; @@ -39,6 +40,7 @@ struct IActorFactory : public TThrRefBase { FederatedQuery::QueryMeta::ComputeStatus status) const = 0; virtual std::unique_ptr CreateStopper(const NActors::TActorId& parent, const NActors::TActorId& connector, + const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId) const = 0; }; diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp index 08382442f6db..2eadd15e1ea6 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp +++ b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.cpp @@ -343,7 +343,14 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap void CreateSingleClientActors(const NConfig::TYdbComputeControlPlane::TSingle& singleConfig) { auto globalLoadConfig = Config.GetYdb().GetLoadControlConfig(); if (globalLoadConfig.GetEnable()) { - auto clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(singleConfig.GetConnection()), CredentialsProviderFactory(GetYdbCredentialSettings(singleConfig.GetConnection()))->CreateProvider()).release()); + TActorId clientActor; + auto monitoringEndpoint = globalLoadConfig.GetMonitoringEndpoint(); + auto credentialsProvider = CredentialsProviderFactory(GetYdbCredentialSettings(singleConfig.GetConnection()))->CreateProvider(); + if (monitoringEndpoint) { + clientActor = Register(CreateMonitoringRestClientActor(monitoringEndpoint, singleConfig.GetConnection().GetDatabase(), credentialsProvider).release()); + } else { + clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(singleConfig.GetConnection()), credentialsProvider).release()); + } MonitoringActorId = Register(CreateDatabaseMonitoringActor(clientActor, globalLoadConfig, Counters).release()); } } @@ -352,15 +359,23 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap const auto& mapping = cmsConfig.GetDatabaseMapping(); auto globalLoadConfig = Config.GetYdb().GetLoadControlConfig(); for (const auto& config: mapping.GetCommon()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateCmsGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); TActorId databaseMonitoringActor; const NConfig::TLoadControlConfig& loadConfig = config.GetLoadControlConfig().GetEnable() - ? Config.GetYdb().GetLoadControlConfig() + ? config.GetLoadControlConfig() : globalLoadConfig; if (loadConfig.GetEnable()) { - auto clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, Counters).release()); + TActorId clientActor; + auto monitoringEndpoint = loadConfig.GetMonitoringEndpoint(); + auto credentialsProvider = CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider(); + if (monitoringEndpoint) { + clientActor = Register(CreateMonitoringRestClientActor(monitoringEndpoint, config.GetControlPlaneConnection().GetDatabase(), credentialsProvider).release()); + } else { + clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), credentialsProvider).release()); + } + databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, databaseCounters).release()); } Clients->CommonDatabaseClients.push_back({clientActor, config, cacheActor, databaseMonitoringActor}); } @@ -368,15 +383,23 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap Y_ABORT_UNLESS(Clients->CommonDatabaseClients); for (const auto& [scope, config]: mapping.GetScopeToComputeDatabase()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateCmsGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); TActorId databaseMonitoringActor; const NConfig::TLoadControlConfig& loadConfig = config.GetLoadControlConfig().GetEnable() - ? Config.GetYdb().GetLoadControlConfig() + ? config.GetLoadControlConfig() : globalLoadConfig; if (loadConfig.GetEnable()) { - auto clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, Counters).release()); + TActorId clientActor; + auto monitoringEndpoint = loadConfig.GetMonitoringEndpoint(); + auto credentialsProvider = CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider(); + if (monitoringEndpoint) { + clientActor = Register(CreateMonitoringRestClientActor(monitoringEndpoint, config.GetControlPlaneConnection().GetDatabase(), credentialsProvider).release()); + } else { + clientActor = Register(CreateMonitoringGrpcClientActor(CreateGrpcClientSettings(config), credentialsProvider).release()); + } + databaseMonitoringActor = Register(CreateDatabaseMonitoringActor(clientActor, loadConfig, databaseCounters).release()); } Clients->ScopeToDatabaseClient[scope] = {clientActor, config, cacheActor, databaseMonitoringActor}; } @@ -385,16 +408,18 @@ class TComputeDatabaseControlPlaneServiceActor : public NActors::TActorBootstrap void CreateControlPlaneClientActors(const NConfig::TYdbComputeControlPlane::TYdbcp& controlPlaneConfig, const TString& databasesCacheReloadPeriod) { const auto& mapping = controlPlaneConfig.GetDatabaseMapping(); for (const auto& config: mapping.GetCommon()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateYdbcpGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); Clients->CommonDatabaseClients.push_back({clientActor, config, cacheActor, {}}); } Y_ABORT_UNLESS(Clients->CommonDatabaseClients); for (const auto& [scope, config]: mapping.GetScopeToComputeDatabase()) { + auto databaseCounters = Counters->GetSubgroup("database", config.GetControlPlaneConnection().GetDatabase()); const auto clientActor = Register(CreateYdbcpGrpcClientActor(CreateGrpcClientSettings(config), CredentialsProviderFactory(GetYdbCredentialSettings(config.GetControlPlaneConnection()))->CreateProvider()).release()); - const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, Counters).release()); + const auto cacheActor = Register(CreateComputeDatabasesCacheActor(clientActor, databasesCacheReloadPeriod, databaseCounters).release()); Clients->ScopeToDatabaseClient[scope] = {clientActor, config, cacheActor, {}}; } } diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h index 509d72ada94b..07ef06c3d20e 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h +++ b/ydb/core/fq/libs/compute/ydb/control_plane/compute_database_control_plane_service.h @@ -29,6 +29,7 @@ std::unique_ptr CreateCmsGrpcClientActor(const NCloud::TGrpcCli std::unique_ptr CreateComputeDatabasesCacheActor(const NActors::TActorId& databaseClientActorId, const TString& databasesCacheReloadPeriod, const ::NMonitoring::TDynamicCounterPtr& counters); std::unique_ptr CreateMonitoringGrpcClientActor(const NCloud::TGrpcClientSettings& settings, const NYdb::TCredentialsProviderPtr& credentialsProvider); +std::unique_ptr CreateMonitoringRestClientActor(const TString& endpoint, const TString& database, const NYdb::TCredentialsProviderPtr& credentialsProvider); std::unique_ptr CreateDatabaseMonitoringActor(const NActors::TActorId& monitoringClientActorId, NFq::NConfig::TLoadControlConfig config, const ::NMonitoring::TDynamicCounterPtr& counters); diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp index 19963aa9fc28..57d7e5da3b92 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp +++ b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp @@ -34,6 +34,8 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrappedGetCounter("InstantLoadPercentage", false); AverageLoadPercentage = subComponent->GetCounter("AverageLoadPercentage", false); QuotedLoadPercentage = subComponent->GetCounter("QuotedLoadPercentage", false); + AvailableLoadPercentage = subComponent->GetCounter("AvailableLoadPercentage", false); + TargetLoadPercentage = subComponent->GetCounter("TargetLoadPercentage", false); PendingQueueSize = subComponent->GetCounter("PendingQueueSize", false); PendingQueueOverload = subComponent->GetCounter("PendingQueueOverload", true); } @@ -78,7 +82,10 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped(MaxClusterLoad * 100); + } static constexpr char ActorName[] = "FQ_COMPUTE_DATABASE_MONITORING_ACTOR"; @@ -113,6 +120,10 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped= AverageLoadInterval) { @@ -252,7 +263,7 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped PendingQueue; }; diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp new file mode 100644 index 000000000000..c7085478fda9 --- /dev/null +++ b/ydb/core/fq/libs/compute/ydb/control_plane/monitoring_rest_client_actor.cpp @@ -0,0 +1,154 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#define LOG_E(stream) LOG_ERROR_S(*TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_W(stream) LOG_WARN_S( *TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_I(stream) LOG_INFO_S( *TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) +#define LOG_T(stream) LOG_TRACE_S(*TlsActivationContext, NKikimrServices::FQ_RUN_ACTOR, "[ydb] [MonitoringRestClient]: " << stream) + +namespace NFq { + +using namespace NActors; + +class TMonitoringRestServiceActor : public NActors::TActor { +public: + using TBase = NActors::TActor; + + TMonitoringRestServiceActor(const TString& endpoint, const TString& database, const NYdb::TCredentialsProviderPtr& credentialsProvider) + : TBase(&TMonitoringRestServiceActor::StateFunc) + , Endpoint(endpoint) + , Database(database) + , CredentialsProvider(credentialsProvider) + {} + + STRICT_STFUNC(StateFunc, + hFunc(TEvYdbCompute::TEvCpuLoadRequest, Handle); + hFunc(NYql::NDq::TEvHttpBase::TEvSendResult, Handle); + ) + + void Handle(TEvYdbCompute::TEvCpuLoadRequest::TPtr& ev) { + if (Y_UNLIKELY(!HttpProxyId)) { + HttpProxyId = Register(NHttp::CreateHttpProxy(NMonitoring::TMetricRegistry::SharedInstance())); + } + + auto httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet( + NYql::TUrlBuilder(Endpoint) + .AddPathComponent("viewer") + .AddPathComponent("json") + .AddPathComponent("tenantinfo") + .AddUrlParam("path", Database) + .Build() + ); + auto ticket = CredentialsProvider->GetAuthInfo(); + LOG_D(httpRequest->GetRawData() << " using ticket " << NKikimr::MaskTicket(ticket)); + httpRequest->Set("Authorization", ticket); + + auto httpSenderId = Register(NYql::NDq::CreateHttpSenderActor(SelfId(), HttpProxyId, NYql::NDq::THttpSenderRetryPolicy::GetNoRetryPolicy())); + Send(httpSenderId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest), 0, Cookie); + Requests[Cookie++] = ev; + } + + void Handle(NYql::NDq::TEvHttpBase::TEvSendResult::TPtr& ev) { + auto it = Requests.find(ev->Cookie); + if (it == Requests.end()) { + LOG_E("Request doesn't exist (TEvSendResult). Need to fix this bug urgently"); + return; + } + auto request = it->second; + Requests.erase(it); + + const auto& result = *ev->Get(); + const auto& response = *result.HttpIncomingResponse->Get(); + + auto forwardResponse = std::make_unique(); + + const TString& error = response.GetError(); + if (!error.empty()) { + forwardResponse->Issues.AddIssue(error); + Send(request->Sender, forwardResponse.release(), 0, request->Cookie); + return; + } + + try { + NJson::TJsonReaderConfig jsonConfig; + NJson::TJsonValue info; + if (NJson::ReadJsonTree(response.Response->Body, &jsonConfig, &info)) { + bool usageFound = false; + if (auto* tenantNode = info.GetValueByPath("TenantInfo")) { + if (tenantNode->GetType() == NJson::JSON_ARRAY) { + for (auto tenantItem : tenantNode->GetArray()) { + if (auto* nameNode = tenantItem.GetValueByPath("Name")) { + if (nameNode->GetStringSafe() != Database) { + continue; + } + } + if (auto* poolNode = tenantItem.GetValueByPath("PoolStats")) { + if (poolNode->GetType() == NJson::JSON_ARRAY) { + for (auto poolItem : poolNode->GetArray()) { + if (auto* nameNode = poolItem.GetValueByPath("Name")) { + if (nameNode->GetStringSafe() == "User") { + if (auto* usageNode = poolItem.GetValueByPath("Usage")) { + forwardResponse->InstantLoad = usageNode->GetDoubleSafe(); + usageFound = true; + break; + } + if (auto* threadsNode = poolItem.GetValueByPath("Threads")) { + forwardResponse->CpuNumber = threadsNode->GetIntegerSafe(); + } + } + } + } + } + } + if (usageFound) { + break; + } + } + } + } + if (!usageFound) { + forwardResponse->Issues.AddIssue(TStringBuilder() << "MISSED User pool node load for database \"" << Database << '"'); + } + } else { + forwardResponse->Issues.AddIssue("Malformed JSON"); + } + } catch(const std::exception& e) { + forwardResponse->Issues.AddIssue(TStringBuilder() << "Error on JSON parsing: '" << e.what() << "'"); + } + + if (forwardResponse->Issues) { + LOG_E(response.Response->Body); + } + Send(request->Sender, forwardResponse.release(), 0, request->Cookie); + } + +private: + TString Endpoint; + TString Database; + TMap Requests; + NYdb::TCredentialsProviderPtr CredentialsProvider; + int64_t Cookie = 0; + TActorId HttpProxyId; +}; + +std::unique_ptr CreateMonitoringRestClientActor(const TString& endpoint, const TString& database, const NYdb::TCredentialsProviderPtr& credentialsProvider) { + return std::make_unique(endpoint, database, credentialsProvider); +} + +} diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make index daedf40cdffe..523a26c2bfe1 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make +++ b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make @@ -6,10 +6,12 @@ SRCS( compute_databases_cache.cpp database_monitoring.cpp monitoring_grpc_client_actor.cpp + monitoring_rest_client_actor.cpp ydbcp_grpc_client_actor.cpp ) PEERDIR( + library/cpp/json ydb/library/actors/core ydb/library/actors/protos ydb/core/fq/libs/compute/ydb/synchronization_service @@ -18,6 +20,8 @@ PEERDIR( ydb/core/protos ydb/library/db_pool/protos ydb/library/yql/public/issue + ydb/library/yql/utils + ydb/library/yql/utils/actors ydb/public/api/grpc ydb/public/api/grpc/draft ydb/public/lib/operation_id/protos diff --git a/ydb/core/fq/libs/compute/ydb/events/events.h b/ydb/core/fq/libs/compute/ydb/events/events.h index 6f5961b75462..3f19becf8203 100644 --- a/ydb/core/fq/libs/compute/ydb/events/events.h +++ b/ydb/core/fq/libs/compute/ydb/events/events.h @@ -71,13 +71,14 @@ struct TEvYdbCompute { // Events struct TEvExecuteScriptRequest : public NActors::TEventLocal { - TEvExecuteScriptRequest(TString sql, TString idempotencyKey, const TDuration& resultTtl, const TDuration& operationTimeout, Ydb::Query::Syntax syntax, Ydb::Query::ExecMode execMode, const TString& traceId) + TEvExecuteScriptRequest(TString sql, TString idempotencyKey, const TDuration& resultTtl, const TDuration& operationTimeout, Ydb::Query::Syntax syntax, Ydb::Query::ExecMode execMode, Ydb::Query::StatsMode statsMode, const TString& traceId) : Sql(std::move(sql)) , IdempotencyKey(std::move(idempotencyKey)) , ResultTtl(resultTtl) , OperationTimeout(operationTimeout) , Syntax(syntax) , ExecMode(execMode) + , StatsMode(statsMode) , TraceId(traceId) {} @@ -87,6 +88,7 @@ struct TEvYdbCompute { TDuration OperationTimeout; Ydb::Query::Syntax Syntax = Ydb::Query::SYNTAX_YQL_V1; Ydb::Query::ExecMode ExecMode = Ydb::Query::EXEC_MODE_EXECUTE; + Ydb::Query::StatsMode StatsMode = Ydb::Query::StatsMode::STATS_MODE_FULL; TString TraceId; }; @@ -117,18 +119,20 @@ struct TEvYdbCompute { }; struct TEvGetOperationResponse : public NActors::TEventLocal { - TEvGetOperationResponse(NYql::TIssues issues, NYdb::EStatus status) + TEvGetOperationResponse(NYql::TIssues issues, NYdb::EStatus status, bool ready) : Issues(std::move(issues)) , Status(status) + , Ready(ready) {} - TEvGetOperationResponse(NYdb::NQuery::EExecStatus execStatus, Ydb::StatusIds::StatusCode statusCode, const TVector& resultSetsMeta, const Ydb::TableStats::QueryStats& queryStats, NYql::TIssues issues) + TEvGetOperationResponse(NYdb::NQuery::EExecStatus execStatus, Ydb::StatusIds::StatusCode statusCode, const TVector& resultSetsMeta, const Ydb::TableStats::QueryStats& queryStats, NYql::TIssues issues, bool ready = true) : ExecStatus(execStatus) , StatusCode(statusCode) , ResultSetsMeta(resultSetsMeta) , QueryStats(queryStats) , Issues(std::move(issues)) , Status(NYdb::EStatus::SUCCESS) + , Ready(ready) {} NYdb::NQuery::EExecStatus ExecStatus = NYdb::NQuery::EExecStatus::Unspecified; @@ -137,6 +141,7 @@ struct TEvYdbCompute { Ydb::TableStats::QueryStats QueryStats; NYql::TIssues Issues; NYdb::EStatus Status; + bool Ready; }; struct TEvFetchScriptResultRequest : public NActors::TEventLocal { @@ -454,16 +459,17 @@ struct TEvYdbCompute { }; struct TEvCpuLoadResponse : public NActors::TEventLocal { - TEvCpuLoadResponse(double instantLoad = 0.0, double averageLoad = 0.0) - : InstantLoad(instantLoad), AverageLoad(averageLoad) + TEvCpuLoadResponse(double instantLoad = 0.0, double averageLoad = 0.0, ui32 cpuNumber = 0) + : InstantLoad(instantLoad), AverageLoad(averageLoad), CpuNumber(cpuNumber) {} TEvCpuLoadResponse(NYql::TIssues issues) - : InstantLoad(0.0), AverageLoad(0.0), Issues(std::move(issues)) + : InstantLoad(0.0), AverageLoad(0.0), CpuNumber(0), Issues(std::move(issues)) {} double InstantLoad; double AverageLoad; + ui32 CpuNumber; NYql::TIssues Issues; }; diff --git a/ydb/core/fq/libs/compute/ydb/executer_actor.cpp b/ydb/core/fq/libs/compute/ydb/executer_actor.cpp index 73a90ba51c6f..177fe00ded33 100644 --- a/ydb/core/fq/libs/compute/ydb/executer_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/executer_actor.cpp @@ -59,9 +59,10 @@ class TExecuterActor : public TBaseComputeActor { } }; - TExecuterActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const ::NYql::NCommon::TServiceCounters& queryCounters) + TExecuterActor(const TRunActorParams& params, Ydb::Query::StatsMode statsMode, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const ::NYql::NCommon::TServiceCounters& queryCounters) : TBaseComputeActor(queryCounters, "Executer") , Params(params) + , StatsMode(statsMode) , Parent(parent) , Connector(connector) , Pinger(pinger) @@ -114,7 +115,7 @@ class TExecuterActor : public TBaseComputeActor { } void SendExecuteScript() { - Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_EXECUTE_SCRIPT), SelfId(), Connector, Params.Sql, Params.JobId, Params.ResultTtl, Params.ExecutionTtl, GetSyntax(), GetExecuteMode(), Params.JobId + "_" + ToString(Params.RestartCount))); + Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_EXECUTE_SCRIPT), SelfId(), Connector, Params.Sql, Params.JobId, Params.ResultTtl, Params.ExecutionTtl, GetSyntax(), GetExecuteMode(), StatsMode, Params.JobId + "_" + ToString(Params.RestartCount))); } Ydb::Query::Syntax GetSyntax() const { @@ -162,6 +163,7 @@ class TExecuterActor : public TBaseComputeActor { private: TRunActorParams Params; + Ydb::Query::StatsMode StatsMode; TActorId Parent; TActorId Connector; TActorId Pinger; @@ -172,11 +174,12 @@ class TExecuterActor : public TBaseComputeActor { }; std::unique_ptr CreateExecuterActor(const TRunActorParams& params, + Ydb::Query::StatsMode statsMode, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, pinger, queryCounters); + return std::make_unique(params, statsMode, parent, connector, pinger, queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/executer_actor.h b/ydb/core/fq/libs/compute/ydb/executer_actor.h index 763501092489..c1a6c1d6478a 100644 --- a/ydb/core/fq/libs/compute/ydb/executer_actor.h +++ b/ydb/core/fq/libs/compute/ydb/executer_actor.h @@ -9,6 +9,7 @@ namespace NFq { std::unique_ptr CreateExecuterActor(const TRunActorParams& params, + Ydb::Query::StatsMode statsMode, const NActors::TActorId& parent, const NActors::TActorId& connector, const NActors::TActorId& pinger, diff --git a/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp b/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp index d2eeb18035b9..1053ef8e005f 100644 --- a/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/resources_cleaner_actor.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -65,14 +66,19 @@ class TResourcesCleanerActor : public TBaseComputeActor , Connector(connector) , OperationId(operationId) , Counters(GetStepCountersSubgroup()) + , BackoffTimer(20, 1000) {} static constexpr char ActorName[] = "FQ_RESOURCES_CLEANER_ACTOR"; + void SendForgetOperation(const TDuration& delay = TDuration::Zero()) { + Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_FORGET_OPERATION), delay, SelfId(), Connector, OperationId)); + } + void Start() { LOG_I("Start resources cleaner actor. Compute state: " << FederatedQuery::QueryMeta::ComputeStatus_Name(Params.Status)); Become(&TResourcesCleanerActor::StateFunc); - Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_FORGET_OPERATION), SelfId(), Connector, OperationId)); + SendForgetOperation(); } STRICT_STFUNC(StateFunc, @@ -81,6 +87,10 @@ class TResourcesCleanerActor : public TBaseComputeActor void Handle(const TEvYdbCompute::TEvForgetOperationResponse::TPtr& ev) { const auto& response = *ev.Get()->Get(); + if (response.Status == NYdb::EStatus::TIMEOUT || response.Status == NYdb::EStatus::CLIENT_DEADLINE_EXCEEDED) { + SendForgetOperation(TDuration::MilliSeconds(BackoffTimer.NextBackoffMs())); + return; + } if (response.Status != NYdb::EStatus::SUCCESS && response.Status != NYdb::EStatus::NOT_FOUND) { LOG_E("Can't forget operation: " << ev->Get()->Issues.ToOneLineString()); Send(Parent, new TEvYdbCompute::TEvResourcesCleanerResponse(ev->Get()->Issues, ev->Get()->Status)); @@ -98,6 +108,7 @@ class TResourcesCleanerActor : public TBaseComputeActor TActorId Connector; NYdb::TOperation::TOperationId OperationId; TCounters Counters; + NKikimr::TBackoffTimer BackoffTimer; }; std::unique_ptr CreateResourcesCleanerActor(const TRunActorParams& params, diff --git a/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp b/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp index ef6da5653b4a..b6f0ff8efc05 100644 --- a/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/result_writer_actor.cpp @@ -202,13 +202,14 @@ class TResultWriterActor : public TBaseComputeActor { } }; - TResultWriterActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, const ::NYql::NCommon::TServiceCounters& queryCounters) + TResultWriterActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, bool operationEntryExpected, const ::NYql::NCommon::TServiceCounters& queryCounters) : TBaseComputeActor(queryCounters, "ResultWriter") , Params(params) , Parent(parent) , Connector(connector) , Pinger(pinger) , OperationId(operationId) + , OperationEntryExpected(operationEntryExpected) , Counters(GetStepCountersSubgroup()) {} @@ -246,6 +247,13 @@ class TResultWriterActor : public TBaseComputeActor { void Handle(const TEvYdbCompute::TEvGetOperationResponse::TPtr& ev) { const auto& response = *ev.Get()->Get(); + if (!OperationEntryExpected && response.Status == NYdb::EStatus::NOT_FOUND) { + LOG_I("Operation has been already removed"); + Send(Parent, new TEvYdbCompute::TEvResultWriterResponse({}, NYdb::EStatus::SUCCESS)); + CompleteAndPassAway(); + return; + } + if (response.Status != NYdb::EStatus::SUCCESS) { LOG_E("Can't get operation: " << ev->Get()->Issues.ToOneLineString()); Send(Parent, new TEvYdbCompute::TEvResultWriterResponse(ev->Get()->Issues, ev->Get()->Status)); @@ -314,6 +322,7 @@ class TResultWriterActor : public TBaseComputeActor { TActorId Connector; TActorId Pinger; NKikimr::NOperationId::TOperationId OperationId; + const bool OperationEntryExpected; TCounters Counters; TInstant StartTime; TString FetchToken; @@ -325,8 +334,9 @@ std::unique_ptr CreateResultWriterActor(const TRunActorParams& const TActorId& connector, const TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, pinger, operationId, queryCounters); + return std::make_unique(params, parent, connector, pinger, operationId, operationEntryExpected, queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/result_writer_actor.h b/ydb/core/fq/libs/compute/ydb/result_writer_actor.h index ee24d14772b1..ca6c1454d42b 100644 --- a/ydb/core/fq/libs/compute/ydb/result_writer_actor.h +++ b/ydb/core/fq/libs/compute/ydb/result_writer_actor.h @@ -13,6 +13,7 @@ std::unique_ptr CreateResultWriterActor(const TRunActorParams& const NActors::TActorId& connector, const NActors::TActorId& pinger, const NKikimr::NOperationId::TOperationId& operationId, + bool operationEntryExpected, const ::NYql::NCommon::TServiceCounters& queryCounters); } diff --git a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp index d6ef6600f05a..c27323eb5748 100644 --- a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp @@ -1,11 +1,10 @@ #include "base_compute_actor.h" +#include "status_tracker_actor.h" -#include #include #include #include #include -#include #include #include #include @@ -14,7 +13,6 @@ #include #include -#include #include #include @@ -41,6 +39,8 @@ class TStatusTrackerActor : public TBaseComputeActor { public: using IRetryPolicy = IRetryPolicy; + using TBase = TBaseComputeActor; + enum ERequestType { RT_GET_OPERATION, RT_PING, @@ -69,16 +69,16 @@ class TStatusTrackerActor : public TBaseComputeActor { } }; - TStatusTrackerActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, const ::NYql::NCommon::TServiceCounters& queryCounters) - : TBaseComputeActor(queryCounters, "StatusTracker") + TStatusTrackerActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) + : TBase(queryCounters, "StatusTracker") , Params(params) , Parent(parent) , Connector(connector) , Pinger(pinger) , OperationId(operationId) + , Builder(params.Config.GetCommon(), std::move(processor)) , Counters(GetStepCountersSubgroup()) , BackoffTimer(20, 1000) - , Compressor(params.Config.GetCommon().GetQueryArtifactsCompressionMethod(), params.Config.GetCommon().GetQueryArtifactsCompressionMinSize()) {} static constexpr char ActorName[] = "FQ_STATUS_TRACKER"; @@ -97,19 +97,23 @@ class TStatusTrackerActor : public TBaseComputeActor { void Handle(const TEvents::TEvForwardPingResponse::TPtr& ev) { auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); pingCounters->InFly->Dec(); + pingCounters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); + + if (ev.Get()->Get()->Success) { + pingCounters->Ok->Inc(); + } else { + pingCounters->Error->Inc(); + } if (ev->Cookie) { return; } - pingCounters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); if (ev.Get()->Get()->Success) { - pingCounters->Ok->Inc(); LOG_I("Information about the status of operation is stored"); Send(Parent, new TEvYdbCompute::TEvStatusTrackerResponse(Issues, Status, ExecStatus, ComputeStatus)); CompleteAndPassAway(); } else { - pingCounters->Error->Inc(); LOG_E("Error saving information about the status of operation"); Send(Parent, new TEvYdbCompute::TEvStatusTrackerResponse(NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Error saving information about the status of operation: " << ProtoToString(OperationId)}}, NYdb::EStatus::INTERNAL_ERROR, ExecStatus, ComputeStatus)); FailedAndPassAway(); @@ -133,8 +137,6 @@ class TStatusTrackerActor : public TBaseComputeActor { return; } - ReportPublicCounters(response.QueryStats); - StartTime = TInstant::Now(); LOG_D("Execution status: " << static_cast(response.ExecStatus)); switch (response.ExecStatus) { case NYdb::NQuery::EExecStatus::Unspecified: @@ -163,47 +165,42 @@ class TStatusTrackerActor : public TBaseComputeActor { } } - void ReportPublicCounters(const Ydb::TableStats::QueryStats& stats) { - try { - auto stat = GetPublicStat(GetV1StatFromV2Plan(stats.query_plan())); - auto publicCounters = GetPublicCounters(); + void ReportPublicCounters(const TPublicStat& stat) { + auto publicCounters = GetPublicCounters(); - if (stat.MemoryUsageBytes) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.memory_usage_bytes"); - counter = *stat.MemoryUsageBytes; - } + if (stat.MemoryUsageBytes) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.memory_usage_bytes"); + counter = *stat.MemoryUsageBytes; + } - if (stat.CpuUsageUs) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.cpu_usage_us", true); - counter = *stat.CpuUsageUs; - } + if (stat.CpuUsageUs) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.cpu_usage_us", true); + counter = *stat.CpuUsageUs; + } - if (stat.InputBytes) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.input_bytes", true); - counter = *stat.InputBytes; - } + if (stat.InputBytes) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.input_bytes", true); + counter = *stat.InputBytes; + } - if (stat.OutputBytes) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.output_bytes", true); - counter = *stat.OutputBytes; - } + if (stat.OutputBytes) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.output_bytes", true); + counter = *stat.OutputBytes; + } - if (stat.SourceInputRecords) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.source_input_records", true); - counter = *stat.SourceInputRecords; - } + if (stat.SourceInputRecords) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.source_input_records", true); + counter = *stat.SourceInputRecords; + } - if (stat.SinkOutputRecords) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.sink_output_records", true); - counter = *stat.SinkOutputRecords; - } + if (stat.SinkOutputRecords) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.sink_output_records", true); + counter = *stat.SinkOutputRecords; + } - if (stat.RunningTasks) { - auto& counter = *publicCounters->GetNamedCounter("name", "query.running_tasks"); - counter = *stat.RunningTasks; - } - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + if (stat.RunningTasks) { + auto& counter = *publicCounters->GetNamedCounter("name", "query.running_tasks"); + counter = *stat.RunningTasks; } } @@ -211,75 +208,57 @@ class TStatusTrackerActor : public TBaseComputeActor { Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_GET_OPERATION), delay, SelfId(), Connector, OperationId)); } - void UpdateProgress() { + void OnPingRequestStart() { + StartTime = TInstant::Now(); auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); pingCounters->InFly->Inc(); - Fq::Private::PingTaskRequest pingTaskRequest; - PrepareAstAndPlan(pingTaskRequest, QueryStats.query_plan(), QueryStats.query_ast()); - try { - pingTaskRequest.set_statistics(GetV1StatFromV2Plan(QueryStats.query_plan())); - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + } + + void UpdateProgress() { + OnPingRequestStart(); + + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); } + ReportPublicCounters(Builder.PublicStat); Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest), 0, 1); } + void UpdateCpuQuota(double cpuUsage) { + TDuration duration = TDuration::MicroSeconds(QueryStats.total_duration_us()); + if (cpuUsage && duration) { + Send(NFq::ComputeDatabaseControlPlaneServiceActorId(), new TEvYdbCompute::TEvCpuQuotaAdjust(Params.Scope.ToString(), duration, cpuUsage)); + } + } + void Failed() { LOG_I("Execution status: Failed, Status: " << Status << ", StatusCode: " << NYql::NDqProto::StatusIds::StatusCode_Name(StatusCode) << " Issues: " << Issues.ToOneLineString()); - auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); - pingCounters->InFly->Inc(); - Fq::Private::PingTaskRequest pingTaskRequest; - NYql::IssuesToMessage(Issues, pingTaskRequest.mutable_issues()); - pingTaskRequest.set_pending_status_code(StatusCode); - PrepareAstAndPlan(pingTaskRequest, QueryStats.query_plan(), QueryStats.query_ast()); - try { - TDuration duration = TDuration::MicroSeconds(QueryStats.total_duration_us()); - double cpuUsage = 0.0; - pingTaskRequest.set_statistics(GetV1StatFromV2Plan(QueryStats.query_plan(), &cpuUsage)); - if (duration && cpuUsage) { - Send(NFq::ComputeDatabaseControlPlaneServiceActorId(), new TEvYdbCompute::TEvCpuQuotaAdjust(Params.Scope.ToString(), duration, cpuUsage)); - } - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + OnPingRequestStart(); + + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues, std::nullopt, StatusCode); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); } + ReportPublicCounters(Builder.PublicStat); + UpdateCpuQuota(Builder.CpuUsage); + Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); } void Complete() { LOG_I("Execution status: Complete " << Status << ", StatusCode: " << NYql::NDqProto::StatusIds::StatusCode_Name(StatusCode) << " Issues: " << Issues.ToOneLineString()); - auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); - pingCounters->InFly->Inc(); - Fq::Private::PingTaskRequest pingTaskRequest; - NYql::IssuesToMessage(Issues, pingTaskRequest.mutable_issues()); + OnPingRequestStart(); + ComputeStatus = ::FederatedQuery::QueryMeta::COMPLETING; - pingTaskRequest.set_status(ComputeStatus); - PrepareAstAndPlan(pingTaskRequest, QueryStats.query_plan(), QueryStats.query_ast()); - try { - TDuration duration = TDuration::MicroSeconds(QueryStats.total_duration_us()); - double cpuUsage = 0.0; - pingTaskRequest.set_statistics(GetV1StatFromV2Plan(QueryStats.query_plan(), &cpuUsage)); - if (duration && cpuUsage) { - Send(NFq::ComputeDatabaseControlPlaneServiceActorId(), new TEvYdbCompute::TEvCpuQuotaAdjust(Params.Scope.ToString(), duration, cpuUsage)); - } - } catch(const NJson::TJsonException& ex) { - LOG_E("Error statistics conversion: " << ex.what()); + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues, ComputeStatus, std::nullopt); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); } - Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); - } + ReportPublicCounters(Builder.PublicStat); + UpdateCpuQuota(Builder.CpuUsage); - void PrepareAstAndPlan(Fq::Private::PingTaskRequest& request, const TString& plan, const TString& expr) const { - if (Compressor.IsEnabled()) { - auto [astCompressionMethod, astCompressed] = Compressor.Compress(expr); - request.mutable_ast_compressed()->set_method(astCompressionMethod); - request.mutable_ast_compressed()->set_data(astCompressed); - - auto [planCompressionMethod, planCompressed] = Compressor.Compress(plan); - request.mutable_plan_compressed()->set_method(planCompressionMethod); - request.mutable_plan_compressed()->set_data(planCompressed); - } else { - request.set_ast(expr); - request.set_plan(plan); - } + Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); } private: @@ -288,16 +267,16 @@ class TStatusTrackerActor : public TBaseComputeActor { TActorId Connector; TActorId Pinger; NYdb::TOperation::TOperationId OperationId; + PingTaskRequestBuilder Builder; TCounters Counters; - TInstant StartTime; NYql::TIssues Issues; NYdb::EStatus Status = NYdb::EStatus::SUCCESS; NYdb::NQuery::EExecStatus ExecStatus = NYdb::NQuery::EExecStatus::Unspecified; NYql::NDqProto::StatusIds::StatusCode StatusCode = NYql::NDqProto::StatusIds::StatusCode::StatusIds_StatusCode_UNSPECIFIED; Ydb::TableStats::QueryStats QueryStats; NKikimr::TBackoffTimer BackoffTimer; - const TCompressor Compressor; FederatedQuery::QueryMeta::ComputeStatus ComputeStatus = FederatedQuery::QueryMeta::RUNNING; + TInstant StartTime; }; std::unique_ptr CreateStatusTrackerActor(const TRunActorParams& params, @@ -305,8 +284,9 @@ std::unique_ptr CreateStatusTrackerActor(const TRunActorParams& const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, pinger, operationId, queryCounters); + return std::make_unique(params, parent, connector, pinger, operationId, std::move(processor), queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h index a453e2d4d341..f9fc469202c0 100644 --- a/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h +++ b/ydb/core/fq/libs/compute/ydb/status_tracker_actor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -13,6 +14,7 @@ std::unique_ptr CreateStatusTrackerActor(const TRunActorParams& const NActors::TActorId& connector, const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters); } diff --git a/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp b/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp index de66c3c1c167..c876bcd4422d 100644 --- a/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/stopper_actor.cpp @@ -1,14 +1,18 @@ #include "base_compute_actor.h" -#include "resources_cleaner_actor.h" +#include "stopper_actor.h" +#include #include #include #include #include +#include #include #include #include +#include + #include #include @@ -32,14 +36,21 @@ using namespace NFq; class TStopperActor : public TBaseComputeActor { public: + + using TBase = TBaseComputeActor; + enum ERequestType { RT_CANCEL_OPERATION, + RT_GET_OPERATION, + RT_PING, RT_MAX }; class TCounters: public virtual TThrRefBase { std::array Requests = CreateArray({ - { MakeIntrusive("CancelOperation") } + { MakeIntrusive("CancelOperation") }, + { MakeIntrusive("GetOperation") }, + { MakeIntrusive("Ping") } }); ::NMonitoring::TDynamicCounterPtr Counters; @@ -58,12 +69,14 @@ class TStopperActor : public TBaseComputeActor { } }; - TStopperActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const NYdb::TOperation::TOperationId& operationId, const ::NYql::NCommon::TServiceCounters& queryCounters) - : TBaseComputeActor(queryCounters, "Stopper") + TStopperActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) + : TBase(queryCounters, "Stopper") , Params(params) , Parent(parent) , Connector(connector) + , Pinger(pinger) , OperationId(operationId) + , Builder(params.Config.GetCommon(), std::move(processor)) , Counters(GetStepCountersSubgroup()) {} @@ -77,17 +90,77 @@ class TStopperActor : public TBaseComputeActor { STRICT_STFUNC(StateFunc, hFunc(TEvYdbCompute::TEvCancelOperationResponse, Handle); + hFunc(TEvYdbCompute::TEvGetOperationResponse, Handle); + hFunc(TEvents::TEvForwardPingResponse, Handle); ) void Handle(const TEvYdbCompute::TEvCancelOperationResponse::TPtr& ev) { const auto& response = *ev.Get()->Get(); if (response.Status != NYdb::EStatus::SUCCESS && response.Status != NYdb::EStatus::NOT_FOUND && response.Status != NYdb::EStatus::PRECONDITION_FAILED) { - LOG_E("Can't cancel operation: " << ev->Get()->Issues.ToOneLineString()); - Send(Parent, new TEvYdbCompute::TEvStopperResponse(response.Issues, response.Status)); - FailedAndPassAway(); + LOG_E("Can't cancel operation: " << response.Issues.ToOneLineString()); + Failed(response.Status, response.Issues); return; } + + if (response.Status == NYdb::EStatus::NOT_FOUND) { + LOG_I("Operation successfully canceled and already removed"); + Complete(); + return; + } + LOG_I("Operation successfully canceled: " << response.Status); + Register(new TRetryActor(Counters.GetCounters(ERequestType::RT_GET_OPERATION), SelfId(), Connector, OperationId)); + } + + void Handle(const TEvYdbCompute::TEvGetOperationResponse::TPtr& ev) { + const auto& response = *ev.Get()->Get(); + if (response.Status != NYdb::EStatus::SUCCESS && response.Status != NYdb::EStatus::NOT_FOUND) { + LOG_E("Can't get operation: " << response.Issues.ToOneLineString()); + Failed(response.Status, response.Issues); + return; + } + + if (response.Status == NYdb::EStatus::NOT_FOUND) { + LOG_I("Operation has been already removed"); + Complete(); + return; + } + + auto statusCode = NYql::NDq::YdbStatusToDqStatus(response.StatusCode); + LOG_I("Operation successfully fetched, Status: " << response.Status << ", StatusCode: " << NYql::NDqProto::StatusIds::StatusCode_Name(statusCode) << " Issues: " << response.Issues.ToOneLineString()); + + StartTime = TInstant::Now(); + auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); + pingCounters->InFly->Inc(); + + Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(response.QueryStats, response.Issues, FederatedQuery::QueryMeta::ABORTING_BY_USER, statusCode); + if (Builder.Issues) { + LOG_W(Builder.Issues.ToOneLineString()); + } + Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest)); + } + + void Handle(const TEvents::TEvForwardPingResponse::TPtr& ev) { + auto pingCounters = Counters.GetCounters(ERequestType::RT_PING); + pingCounters->InFly->Dec(); + pingCounters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); + + if (ev.Get()->Get()->Success) { + pingCounters->Ok->Inc(); + LOG_I("Information about the status of operation is updated"); + } else { + pingCounters->Error->Inc(); + LOG_E("Error updating information about the status of operation"); + } + Complete(); + } + + void Failed(NYdb::EStatus status, NYql::TIssues issues) { + Send(Parent, new TEvYdbCompute::TEvStopperResponse(issues, status)); + FailedAndPassAway(); + } + + void Complete() { Send(Parent, new TEvYdbCompute::TEvStopperResponse({}, NYdb::EStatus::SUCCESS)); CompleteAndPassAway(); } @@ -96,16 +169,21 @@ class TStopperActor : public TBaseComputeActor { TRunActorParams Params; TActorId Parent; TActorId Connector; + TActorId Pinger; NYdb::TOperation::TOperationId OperationId; + PingTaskRequestBuilder Builder; TCounters Counters; + TInstant StartTime; }; std::unique_ptr CreateStopperActor(const TRunActorParams& params, const TActorId& parent, const TActorId& connector, + const TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters) { - return std::make_unique(params, parent, connector, operationId, queryCounters); + return std::make_unique(params, parent, connector, pinger, operationId, std::move(processor), queryCounters); } } diff --git a/ydb/core/fq/libs/compute/ydb/stopper_actor.h b/ydb/core/fq/libs/compute/ydb/stopper_actor.h index e4046dc176f6..f078664566c2 100644 --- a/ydb/core/fq/libs/compute/ydb/stopper_actor.h +++ b/ydb/core/fq/libs/compute/ydb/stopper_actor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -11,7 +12,9 @@ namespace NFq { std::unique_ptr CreateStopperActor(const TRunActorParams& params, const NActors::TActorId& parent, const NActors::TActorId& connector, + const NActors::TActorId& pinger, const NYdb::TOperation::TOperationId& operationId, + std::unique_ptr&& processor, const ::NYql::NCommon::TServiceCounters& queryCounters); } diff --git a/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp b/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp index db8319ec5a1d..a576908d779f 100644 --- a/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/ydb_connector_actor.cpp @@ -24,12 +24,7 @@ class TYdbConnectorActor : public NActors::TActorBootstrapped(ComputeConnection, CredentialsProviderFactory); @@ -55,7 +50,7 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedExecuteScript(event.Sql, settings) @@ -68,16 +63,18 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.Status().GetIssues(), - response.Status().GetStatus(), database), + response.Status().GetStatus()), 0, cookie); } } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); @@ -90,21 +87,34 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend(recipient, new TEvYdbCompute::TEvGetOperationResponse(response.Metadata().ExecStatus, static_cast(response.Status().GetStatus()), response.Metadata().ResultSetsMeta, response.Metadata().ExecStats, RemoveDatabaseFromIssues(response.Status().GetIssues(), database)), 0, cookie); + actorSystem->Send( + recipient, + new TEvYdbCompute::TEvGetOperationResponse( + response.Metadata().ExecStatus, + static_cast(response.Status().GetStatus()), + response.Metadata().ResultSetsMeta, + response.Metadata().ExecStats, + RemoveDatabaseFromIssues(response.Status().GetIssues(), database), + response.Ready()), + 0, cookie); } else { actorSystem->Send( recipient, MakeResponse( + database, response.Status().GetIssues(), - response.Status().GetStatus(), database), + response.Status().GetStatus(), + true), 0, cookie); } } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR, + true), 0, cookie); } }); @@ -124,16 +134,18 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.GetIssues(), - response.GetStatus(), database), + response.GetStatus()), 0, cookie); } } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); @@ -148,15 +160,17 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.GetIssues(), - response.GetStatus(), database), + response.GetStatus()), 0, cookie); } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); @@ -171,28 +185,30 @@ class TYdbConnectorActor : public NActors::TActorBootstrappedSend( recipient, MakeResponse( + database, response.GetIssues(), - response.GetStatus(), database), + response.GetStatus()), 0, cookie); } catch (...) { actorSystem->Send( recipient, MakeResponse( + database, CurrentExceptionMessage(), - NYdb::EStatus::GENERIC_ERROR, database), + NYdb::EStatus::GENERIC_ERROR), 0, cookie); } }); } - template - static TResponse* MakeResponse(TString msg, NYdb::EStatus status, TString databasePath) { - return new TResponse(NYql::TIssues{NYql::TIssue{RemoveDatabaseFromStr(msg, databasePath)}}, status); + template + static TResponse* MakeResponse(TString databasePath, TString msg, TArgs&&... args) { + return new TResponse(NYql::TIssues{NYql::TIssue{RemoveDatabaseFromStr(msg, databasePath)}}, std::forward(args)...); } - template - static TResponse* MakeResponse(const NYql::TIssues& issues, NYdb::EStatus status, TString databasePath) { - return new TResponse(RemoveDatabaseFromIssues(issues, databasePath), status); + template + static TResponse* MakeResponse(TString databasePath, const NYql::TIssues& issues, TArgs&&... args) { + return new TResponse(RemoveDatabaseFromIssues(issues, databasePath), std::forward(args)...); } private: @@ -201,7 +217,6 @@ class TYdbConnectorActor : public NActors::TActorBootstrapped QueryClient; std::unique_ptr OperationClient; - Ydb::Query::StatsMode StatsMode; }; std::unique_ptr CreateConnectorActor(const TRunActorParams& params) { diff --git a/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp b/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp index e382035e183a..89392e15bda2 100644 --- a/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp +++ b/ydb/core/fq/libs/compute/ydb/ydb_run_actor.cpp @@ -98,10 +98,14 @@ class TYdbRunActor : public NActors::TActorBootstrapped { } void Handle(const TEvYdbCompute::TEvStatusTrackerResponse::TPtr& ev) { + if (CancelOperationIsRunning("StatusTrackerResponse (aborting). ")) { + return; + } + auto& response = *ev->Get(); if (response.Status == NYdb::EStatus::NOT_FOUND) { // FAILING / ABORTING_BY_USER / ABORTING_BY_SYSTEM LOG_I("StatusTrackerResponse (not found). Status: " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, Params.Status).release()); + CreateFinalizer(Params.Status); return; } @@ -114,13 +118,17 @@ class TYdbRunActor : public NActors::TActorBootstrapped { Params.Status = response.ComputeStatus; LOG_I("StatusTrackerResponse (success) " << response.Status << " ExecStatus: " << static_cast(response.ExecStatus) << " Issues: " << response.Issues.ToOneLineString()); if (response.ExecStatus == NYdb::NQuery::EExecStatus::Completed) { - Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId).release()); + Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId, true).release()); } else { - Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + CreateResourcesCleaner(); } } void Handle(const TEvYdbCompute::TEvResultWriterResponse::TPtr& ev) { + if (CancelOperationIsRunning("ResultWriterResponse (aborting). ")) { + return; + } + auto& response = *ev->Get(); if (response.Status != NYdb::EStatus::SUCCESS) { LOG_I("ResultWriterResponse (failed). Status: " << response.Status << " Issues: " << response.Issues.ToOneLineString()); @@ -128,7 +136,7 @@ class TYdbRunActor : public NActors::TActorBootstrapped { return; } LOG_I("ResultWriterResponse (success) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + CreateResourcesCleaner(); } void Handle(const TEvYdbCompute::TEvResourcesCleanerResponse::TPtr& ev) { @@ -139,22 +147,23 @@ class TYdbRunActor : public NActors::TActorBootstrapped { return; } LOG_I("ResourcesCleanerResponse (success) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, IsAborted ? FederatedQuery::QueryMeta::ABORTING_BY_USER : Params.Status).release()); + CreateFinalizer(IsAborted ? FederatedQuery::QueryMeta::ABORTING_BY_USER : Params.Status); } void Handle(const TEvYdbCompute::TEvFinalizerResponse::TPtr ev) { // Pinger is no longer available at this place. // The query can be restarted only after the expiration of lease in case of error auto& response = *ev->Get(); - LOG_I("FinalizerResponse ( " << (response.Status == NYdb::EStatus::SUCCESS ? "success" : "failed") << ") " << response.Status << " Issues: " << response.Issues.ToOneLineString()); + LOG_I("FinalizerResponse ( " << (response.Status == NYdb::EStatus::SUCCESS ? "success" : "failed") << " ) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); FinishAndPassAway(); } void Handle(TEvents::TEvQueryActionResult::TPtr& ev) { LOG_I("QueryActionResult: " << FederatedQuery::QueryAction_Name(ev->Get()->Action)); - if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED && !IsAborted) { + // Start cancel operation only when StatusTracker or ResultWriter is running + if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED && !IsAborted && !FinalizationStarted) { IsAborted = true; - Register(ActorFactory->CreateStopper(SelfId(), Connector, Params.OperationId).release()); + Register(ActorFactory->CreateStopper(SelfId(), Connector, Pinger, Params.OperationId).release()); } } @@ -166,7 +175,7 @@ class TYdbRunActor : public NActors::TActorBootstrapped { return; } LOG_I("StopperResponse (success) " << response.Status << " Issues: " << response.Issues.ToOneLineString()); - Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + CreateResourcesCleaner(); } void Run() { // recover points @@ -183,9 +192,9 @@ class TYdbRunActor : public NActors::TActorBootstrapped { break; case FederatedQuery::QueryMeta::COMPLETING: if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED) { - Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId).release()); + Register(ActorFactory->CreateResultWriter(SelfId(), Connector, Pinger, Params.OperationId, false).release()); } else { - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, Params.Status).release()); + CreateFinalizer(Params.Status); } break; case FederatedQuery::QueryMeta::FAILING: @@ -194,7 +203,7 @@ class TYdbRunActor : public NActors::TActorBootstrapped { if (Params.OperationId.GetKind() != Ydb::TOperationId::UNUSED) { Register(ActorFactory->CreateStatusTracker(SelfId(), Connector, Pinger, Params.OperationId).release()); } else { - Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, Params.Status).release()); + CreateFinalizer(Params.Status); } break; default: @@ -220,8 +229,28 @@ class TYdbRunActor : public NActors::TActorBootstrapped { PassAway(); } + void CreateResourcesCleaner() { + FinalizationStarted = true; + Register(ActorFactory->CreateResourcesCleaner(SelfId(), Connector, Params.OperationId).release()); + } + + void CreateFinalizer(FederatedQuery::QueryMeta::ComputeStatus status) { + FinalizationStarted = true; + Register(ActorFactory->CreateFinalizer(Params, SelfId(), Pinger, ExecStatus, status).release()); + } + + bool CancelOperationIsRunning(const TString& stage) const { + if (!IsAborted) { + return false; + } + + LOG_I(stage << "Stop task execution, cancel operation now is running"); + return true; + } + private: bool IsAborted = false; + bool FinalizationStarted = false; TActorId FetcherId; NYdb::NQuery::EExecStatus ExecStatus = NYdb::NQuery::EExecStatus::Unspecified; TRunActorParams Params; diff --git a/ydb/core/fq/libs/config/protos/compute.proto b/ydb/core/fq/libs/config/protos/compute.proto index 6084dc6cede6..c4d941fc0862 100644 --- a/ydb/core/fq/libs/config/protos/compute.proto +++ b/ydb/core/fq/libs/config/protos/compute.proto @@ -26,6 +26,7 @@ message TLoadControlConfig { uint32 PendingQueueSize = 6; // default 0 == instant decline if overloaded bool Strict = 7; // default false, whether to deny execution in load level unavailable uint32 CpuNumber = 8; + string MonitoringEndpoint = 9; // if defined, will be used as REST API instead of default GRPC } message TComputeDatabaseConfig { diff --git a/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp b/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp index 420c7743be3e..7d73bcee6855 100644 --- a/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp +++ b/ydb/core/fq/libs/control_plane_proxy/actors/query_utils.cpp @@ -177,6 +177,14 @@ TString MakeCreateExternalDataSourceQuery( switch (connectionContent.setting().connection_case()) { case FederatedQuery::ConnectionSetting::CONNECTION_NOT_SET: case FederatedQuery::ConnectionSetting::kYdbDatabase: + properties = fmt::format( + R"( + SOURCE_TYPE="Ydb", + DATABASE_ID={database_id}, + USE_TLS="{use_tls}" + )", + "database_id"_a = EncloseAndEscapeString(connectionContent.setting().ydb_database().database_id(), '"'), + "use_tls"_a = common.GetDisableSslForGenericDataSources() ? "false" : "true"); break; case FederatedQuery::ConnectionSetting::kClickhouseCluster: properties = fmt::format( diff --git a/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp b/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp index 1ddbb70aa802..66dd314978c9 100644 --- a/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp +++ b/ydb/core/fq/libs/control_plane_proxy/control_plane_proxy.cpp @@ -1,7 +1,6 @@ #include "config.h" #include "control_plane_proxy.h" #include "probes.h" -#include "utils.h" #include #include @@ -23,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/ydb/core/fq/libs/control_plane_proxy/events/events.h b/ydb/core/fq/libs/control_plane_proxy/events/events.h index 397a43a3a8ee..46091413dae6 100644 --- a/ydb/core/fq/libs/control_plane_proxy/events/events.h +++ b/ydb/core/fq/libs/control_plane_proxy/events/events.h @@ -394,4 +394,6 @@ struct TEvControlPlaneProxy { }; }; +NActors::TActorId ControlPlaneProxyActorId(); + } diff --git a/ydb/core/fq/libs/control_plane_proxy/utils.h b/ydb/core/fq/libs/control_plane_proxy/utils/utils.h similarity index 100% rename from ydb/core/fq/libs/control_plane_proxy/utils.h rename to ydb/core/fq/libs/control_plane_proxy/utils/utils.h diff --git a/ydb/core/fq/libs/control_plane_proxy/utils/ya.make b/ydb/core/fq/libs/control_plane_proxy/utils/ya.make new file mode 100644 index 000000000000..9fe9e6e570bf --- /dev/null +++ b/ydb/core/fq/libs/control_plane_proxy/utils/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +PEERDIR( + ydb/public/api/protos +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/fq/libs/control_plane_proxy/ya.make b/ydb/core/fq/libs/control_plane_proxy/ya.make index 4bd32ad74c0c..bb6fe225efa7 100644 --- a/ydb/core/fq/libs/control_plane_proxy/ya.make +++ b/ydb/core/fq/libs/control_plane_proxy/ya.make @@ -33,6 +33,7 @@ END() RECURSE( actors events + utils ) RECURSE_FOR_TESTS( diff --git a/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp b/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp index 246d3f3852d1..c0802446d019 100644 --- a/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp +++ b/ydb/core/fq/libs/control_plane_storage/internal/task_ping.cpp @@ -256,7 +256,8 @@ TPingTaskParams ConstructHardPingTask( internal.clear_statistics(); PackStatisticsToProtobuf(*internal.mutable_statistics(), statistics); - if (!dumpRawStatistics) { + // global dumpRawStatistics will be removed with YQv1 + if (!dumpRawStatistics && !request.dump_raw_statistics()) { try { statistics = GetPrettyStatistics(statistics); } catch (const std::exception&) { diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp index 5bc2b76e7986..dd3f014e851f 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_bindings.cpp @@ -200,7 +200,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListBinding queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" BINDING_ID_COLUMN_NAME "`, `" BINDING_COLUMN_NAME "` FROM `" BINDINGS_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" BINDING_ID_COLUMN_NAME "`, `" BINDING_COLUMN_NAME "` FROM `" BINDINGS_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" BINDING_ID_COLUMN_NAME "` >= $last_binding\n" ); @@ -241,7 +241,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListBinding } queryBuilder.AddText( - "ORDER BY `" BINDING_ID_COLUMN_NAME "`\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" BINDING_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp index c999bbae6177..1b539d3c8a77 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_connections.cpp @@ -216,7 +216,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListConnect queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" CONNECTION_ID_COLUMN_NAME "`, `" CONNECTION_COLUMN_NAME "` FROM `" CONNECTIONS_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" CONNECTION_ID_COLUMN_NAME "`, `" CONNECTION_COLUMN_NAME "` FROM `" CONNECTIONS_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" CONNECTION_ID_COLUMN_NAME "` >= $last_connection\n" ); @@ -257,7 +257,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListConnect } queryBuilder.AddText( - "ORDER BY `" CONNECTION_ID_COLUMN_NAME "`\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" CONNECTION_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); diff --git a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp index 7ab56126f135..32026450cafa 100644 --- a/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp +++ b/ydb/core/fq/libs/control_plane_storage/ydb_control_plane_storage_queries.cpp @@ -28,7 +28,7 @@ FederatedQuery::IamAuth::IdentityCase GetIamAuth(const FederatedQuery::Connectio const auto& setting = connection.content().setting(); switch (setting.connection_case()) { case FederatedQuery::ConnectionSetting::kYdbDatabase: - return setting.data_streams().auth().identity_case(); + return setting.ydb_database().auth().identity_case(); case FederatedQuery::ConnectionSetting::kClickhouseCluster: return setting.clickhouse_cluster().auth().identity_case(); case FederatedQuery::ConnectionSetting::kObjectStorage: @@ -391,7 +391,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListQueries queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" QUERY_ID_COLUMN_NAME "`, `" QUERY_COLUMN_NAME "` FROM `" QUERIES_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`, `" QUERY_COLUMN_NAME "` FROM `" QUERIES_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` >= $last_query AND (`" EXPIRE_AT_COLUMN_NAME "` is NULL OR `" EXPIRE_AT_COLUMN_NAME "` > $now)" ); @@ -461,7 +461,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListQueries } queryBuilder.AddText( - "ORDER BY " QUERY_ID_COLUMN_NAME "\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); @@ -1519,9 +1519,9 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetResultDa "SELECT `" QUERY_COLUMN_NAME "`, `" USER_COLUMN_NAME "`, `" VISIBILITY_COLUMN_NAME "`, `" STATUS_COLUMN_NAME "`, `" RESULT_SETS_EXPIRE_AT_COLUMN_NAME "` FROM $query_info;\n" "$result_id = SELECT `" RESULT_ID_COLUMN_NAME "` FROM $query_info\n" "WHERE `" RESULT_SETS_EXPIRE_AT_COLUMN_NAME "` >= $now;\n" - "SELECT `" RESULT_SET_ID_COLUMN_NAME "`, `" RESULT_SET_COLUMN_NAME "`, `" ROW_ID_COLUMN_NAME "` FROM `" RESULT_SETS_TABLE_NAME "`\n" + "SELECT `" RESULT_ID_COLUMN_NAME "`, `" RESULT_SET_ID_COLUMN_NAME "`, `" RESULT_SET_COLUMN_NAME "`, `" ROW_ID_COLUMN_NAME "` FROM `" RESULT_SETS_TABLE_NAME "`\n" "WHERE `" RESULT_ID_COLUMN_NAME "` = $result_id AND `" RESULT_SET_ID_COLUMN_NAME "` = $result_set_index AND `" ROW_ID_COLUMN_NAME "` >= $offset\n" - "ORDER BY `" ROW_ID_COLUMN_NAME "`\n" + "ORDER BY `" RESULT_ID_COLUMN_NAME "`, `" RESULT_SET_ID_COLUMN_NAME "`, `" ROW_ID_COLUMN_NAME "`\n" "LIMIT $limit;\n" ); @@ -1653,7 +1653,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListJobsReq queryBuilder.AddTimestamp("now", TInstant::Now()); queryBuilder.AddUint64("limit", limit + 1); queryBuilder.AddText( - "SELECT `" JOB_ID_COLUMN_NAME "`, `" JOB_COLUMN_NAME "` FROM `" JOBS_TABLE_NAME "`\n" + "SELECT `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`, `" JOB_ID_COLUMN_NAME "`, `" JOB_COLUMN_NAME "` FROM `" JOBS_TABLE_NAME "`\n" "WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` >= $last_query\n" "AND `" JOB_ID_COLUMN_NAME "` >= $last_job AND (`" EXPIRE_AT_COLUMN_NAME "` is NULL OR `" EXPIRE_AT_COLUMN_NAME "` > $now) " ); @@ -1681,7 +1681,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvListJobsReq } queryBuilder.AddText( - "ORDER BY `" JOB_ID_COLUMN_NAME "`\n" + "ORDER BY `" SCOPE_COLUMN_NAME "`, `" QUERY_ID_COLUMN_NAME "`, `" JOB_ID_COLUMN_NAME "`\n" "LIMIT $limit;" ); diff --git a/ydb/core/fq/libs/events/events.h b/ydb/core/fq/libs/events/events.h index e0a84beb4d4c..73fa33dc4d6d 100644 --- a/ydb/core/fq/libs/events/events.h +++ b/ydb/core/fq/libs/events/events.h @@ -250,6 +250,8 @@ struct TEvents { }; }; +NActors::TActorId MakeYqPrivateProxyId(); + } // namespace NFq template<> diff --git a/ydb/core/fq/libs/init/init.cpp b/ydb/core/fq/libs/init/init.cpp index 0714425a6eb5..d045af7b0a02 100644 --- a/ydb/core/fq/libs/init/init.cpp +++ b/ydb/core/fq/libs/init/init.cpp @@ -177,7 +177,10 @@ void Init( &protoConfig.GetGateways().GetHttpGateway(), yqCounters->GetSubgroup("subcomponent", "http_gateway")); - const auto connectorClient = NYql::NConnector::MakeClientGRPC(protoConfig.GetGateways().GetGeneric().GetConnector()); + NYql::NConnector::IClient::TPtr connectorClient = nullptr; + if (protoConfig.GetGateways().GetGeneric().HasConnector()) { + connectorClient = NYql::NConnector::MakeClientGRPC(protoConfig.GetGateways().GetGeneric().GetConnector()); + } if (protoConfig.GetTokenAccessor().GetEnabled()) { const auto& tokenAccessorConfig = protoConfig.GetTokenAccessor(); diff --git a/ydb/core/fq/libs/protos/fq_private.proto b/ydb/core/fq/libs/protos/fq_private.proto index bba6f0ffdb40..61d9f4925658 100644 --- a/ydb/core/fq/libs/protos/fq_private.proto +++ b/ydb/core/fq/libs/protos/fq_private.proto @@ -162,6 +162,7 @@ message PingTaskRequest { string operation_id = 35; string execution_id = 36; NYql.NDqProto.StatusIds.StatusCode pending_status_code = 37; + bool dump_raw_statistics = 38; } message PingTaskResult { diff --git a/ydb/core/grpc_services/query/rpc_execute_query.cpp b/ydb/core/grpc_services/query/rpc_execute_query.cpp index 6865cb8379e5..ba8b42cf88bc 100644 --- a/ydb/core/grpc_services/query/rpc_execute_query.cpp +++ b/ydb/core/grpc_services/query/rpc_execute_query.cpp @@ -394,12 +394,18 @@ class TExecuteQueryRPC : public TActorBootstrapped { return; } - if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - Request_->SetRuHeader(record.GetConsumedRu()); + Ydb::Query::ExecuteQueryResponsePart response; - auto& kqpResponse = record.GetResponse(); + if (NeedReportStats(*Request_->GetProtoRequest())) { + hasTrailingMessage = true; + FillQueryStats(*response.mutable_exec_stats(), kqpResponse); + if (NeedReportAst(*Request_->GetProtoRequest())) { + response.mutable_exec_stats()->set_query_ast(kqpResponse.GetQueryAst()); + } + } - Ydb::Query::ExecuteQueryResponsePart response; + if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { + Request_->SetRuHeader(record.GetConsumedRu()); if (QueryAction == NKikimrKqp::QUERY_ACTION_EXECUTE) { for(int i = 0; i < kqpResponse.GetYdbResults().size(); i++) { @@ -415,27 +421,15 @@ class TExecuteQueryRPC : public TActorBootstrapped { hasTrailingMessage = true; response.mutable_tx_meta()->set_id(kqpResponse.GetTxMeta().id()); } - - if (NeedReportStats(*Request_->GetProtoRequest())) { - hasTrailingMessage = true; - FillQueryStats(*response.mutable_exec_stats(), kqpResponse); - if (NeedReportAst(*Request_->GetProtoRequest())) { - response.mutable_exec_stats()->set_query_ast(kqpResponse.GetQueryAst()); - } - } - - if (hasTrailingMessage) { - response.set_status(Ydb::StatusIds::SUCCESS); - response.mutable_issues()->CopyFrom(issueMessage); - TString out; - Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); - const auto finishStreamFlag = NYdbGrpc::IRequestContextBase::EStreamCtrl::FINISH; - Request_->SendSerializedResult(std::move(out), record.GetYdbStatus(), finishStreamFlag); - this->PassAway(); - } } - if (!hasTrailingMessage) { + if (hasTrailingMessage) { + response.set_status(record.GetYdbStatus()); + response.mutable_issues()->CopyFrom(issueMessage); + TString out; + Y_PROTOBUF_SUPPRESS_NODISCARD response.SerializeToString(&out); + ReplySerializedAndFinishStream(record.GetYdbStatus(), std::move(out)); + } else { NYql::TIssues issues; NYql::IssuesFromMessage(issueMessage, issues); ReplyFinishStream(record.GetYdbStatus(), issueMessage); @@ -454,6 +448,12 @@ class TExecuteQueryRPC : public TActorBootstrapped { ReplyFinishStream(Ydb::StatusIds::INTERNAL_ERROR, issue); } + void ReplySerializedAndFinishStream(Ydb::StatusIds::StatusCode status, TString&& buf) { + const auto finishStreamFlag = NYdbGrpc::IRequestContextBase::EStreamCtrl::FINISH; + Request_->SendSerializedResult(std::move(buf), status, finishStreamFlag); + this->PassAway(); + } + void ReplyFinishStream(Ydb::StatusIds::StatusCode status, const NYql::TIssue& issue) { google::protobuf::RepeatedPtrField issuesMessage; NYql::IssueToMessage(issue, issuesMessage.Add()); diff --git a/ydb/core/grpc_services/rpc_fq.cpp b/ydb/core/grpc_services/rpc_fq.cpp index 49701c1b80b6..8d165726f340 100644 --- a/ydb/core/grpc_services/rpc_fq.cpp +++ b/ydb/core/grpc_services/rpc_fq.cpp @@ -4,9 +4,8 @@ #include #include #include -#include #include -#include +#include #include #include diff --git a/ydb/core/grpc_services/rpc_fq_internal.cpp b/ydb/core/grpc_services/rpc_fq_internal.cpp index 96dd18b34492..02c80ba4d125 100644 --- a/ydb/core/grpc_services/rpc_fq_internal.cpp +++ b/ydb/core/grpc_services/rpc_fq_internal.cpp @@ -3,7 +3,6 @@ #include "rpc_deferrable.h" #include -#include #include #include diff --git a/ydb/core/grpc_services/service_fq.h b/ydb/core/grpc_services/service_fq.h index f632bb2ee9cc..85b226f8ea02 100644 --- a/ydb/core/grpc_services/service_fq.h +++ b/ydb/core/grpc_services/service_fq.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace NKikimr { namespace NGRpcService { diff --git a/ydb/core/grpc_services/ya.make b/ydb/core/grpc_services/ya.make index c3fd88321a38..6af54cac7e44 100644 --- a/ydb/core/grpc_services/ya.make +++ b/ydb/core/grpc_services/ya.make @@ -95,8 +95,7 @@ PEERDIR( ydb/core/discovery ydb/core/engine ydb/core/formats - ydb/core/fq/libs/actors - ydb/core/fq/libs/control_plane_proxy + ydb/core/fq/libs/events ydb/core/fq/libs/control_plane_proxy/events ydb/core/grpc_services/base ydb/core/grpc_services/counters diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index a127d271e60f..c0e407f01933 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -1140,19 +1140,15 @@ class TSelfCheckRequest : public TActorBootstrapped { static void Check(TSelfCheckContext& context, const NKikimrWhiteboard::TSystemStateInfo::TPoolStats& poolStats) { if (poolStats.name() == "System" || poolStats.name() == "IC" || poolStats.name() == "IO") { if (poolStats.usage() >= 0.99) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Pool usage over 99%", ETags::OverloadState); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage is over than 99%", ETags::OverloadState); } else if (poolStats.usage() >= 0.95) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 95%", ETags::OverloadState); - } else if (poolStats.usage() >= 0.90) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 90%", ETags::OverloadState); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage is over than 95%", ETags::OverloadState); } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } } else { if (poolStats.usage() >= 0.99) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 99%", ETags::OverloadState); - } else if (poolStats.usage() >= 0.95) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 95%", ETags::OverloadState); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage is over than 99%", ETags::OverloadState); } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } @@ -1222,7 +1218,7 @@ class TSelfCheckRequest : public TActorBootstrapped { break; case TNodeTabletState::ETabletState::RestartsTooOften: computeTabletStatus.set_state("RESTARTS_TOO_OFTEN"); - tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Tablets are restarting too often", ETags::TabletState); + tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Tablets are restarting too often", ETags::TabletState); break; case TNodeTabletState::ETabletState::Dead: computeTabletStatus.set_state("DEAD"); @@ -1261,7 +1257,7 @@ class TSelfCheckRequest : public TActorBootstrapped { TSelfCheckContext rrContext(&context, "NODE_UPTIME"); if (databaseState.NodeRestartsPerPeriod[nodeId] >= 30) { - rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Node is restarting too often", ETags::Uptime); + rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Node is restarting too often", ETags::Uptime); } else if (databaseState.NodeRestartsPerPeriod[nodeId] >= 10) { rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "The number of node restarts has increased", ETags::Uptime); } else { diff --git a/ydb/core/kafka_proxy/actors/kafka_read_session_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_read_session_actor.cpp index cd3f6dd7d674..a1886ca53752 100644 --- a/ydb/core/kafka_proxy/actors/kafka_read_session_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_read_session_actor.cpp @@ -36,11 +36,10 @@ void TKafkaReadSessionActor::HandleWakeup(TEvKafka::TEvWakeup::TPtr, const TActo return; } - for (auto& topicToPartitions: NewPartitionsToLockOnTime) { - auto& partitions = topicToPartitions.second; + for (auto& [topicName, partitions]: NewPartitionsToLockOnTime) { for (auto partitionsIt = partitions.begin(); partitionsIt != partitions.end(); ) { if (partitionsIt->LockOn <= ctx.Now()) { - TopicPartitions[topicToPartitions.first].ToLock.emplace(partitionsIt->PartitionId); + TopicPartitions[topicName].ToLock.emplace(partitionsIt->PartitionId); NeedRebalance = true; partitionsIt = partitions.erase(partitionsIt); } else { @@ -408,6 +407,8 @@ void TKafkaReadSessionActor::HandlePipeDestroyed(TEvTabletPipe::TEvClientDestroy } void TKafkaReadSessionActor::ProcessBalancerDead(ui64 tabletId, const TActorContext& ctx) { + NewPartitionsToLockOnTime.clear(); + for (auto& [topicName, topicInfo] : TopicsInfo) { if (topicInfo.TabletID == tabletId) { auto partitionsIt = TopicPartitions.find(topicName); @@ -579,8 +580,7 @@ void TKafkaReadSessionActor::HandleReleasePartition(TEvPersQueue::TEvReleasePart auto newPartitionsToLockCount = newPartitionsToLockIt == NewPartitionsToLockOnTime.end() ? 0 : newPartitionsToLockIt->second.size(); auto topicPartitionsIt = TopicPartitions.find(pathIt->second->GetInternalName()); - Y_ABORT_UNLESS(topicPartitionsIt != TopicPartitions.end()); - Y_ABORT_UNLESS(record.GetCount() <= topicPartitionsIt->second.ToLock.size() + topicPartitionsIt->second.ReadingNow.size() + newPartitionsToLockCount); + Y_ABORT_UNLESS(record.GetCount() <= (topicPartitionsIt.IsEnd() ? 0 : topicPartitionsIt->second.ToLock.size() + topicPartitionsIt->second.ReadingNow.size()) + newPartitionsToLockCount); for (ui32 c = 0; c < record.GetCount(); ++c) { // if some partition not locked yet, then release it without rebalance @@ -599,18 +599,19 @@ void TKafkaReadSessionActor::HandleReleasePartition(TEvPersQueue::TEvReleasePart } NeedRebalance = true; - size_t partitionToReleaseIndex = 0; - size_t i = 0; + ui32 partitionToRelease = 0; + ui32 i = 0; - for (size_t partIndex = 0; partIndex < topicPartitionsIt->second.ReadingNow.size(); partIndex++) { - if (!topicPartitionsIt->second.ToRelease.contains(partIndex) && (group == 0 || partIndex + 1 == group)) { + for (auto curPartition : topicPartitionsIt->second.ReadingNow) { + if (!topicPartitionsIt->second.ToRelease.contains(curPartition) && (group == 0 || curPartition + 1 == group)) { ++i; - if (rand() % i == 0) { // will lead to 1/n probability for each of n partitions - partitionToReleaseIndex = partIndex; + if (rand() % i == 0) { + partitionToRelease = curPartition; } } } - topicPartitionsIt->second.ToRelease.emplace(partitionToReleaseIndex); + + topicPartitionsIt->second.ToRelease.emplace(partitionToRelease); } } diff --git a/ydb/core/kqp/common/events/script_executions.h b/ydb/core/kqp/common/events/script_executions.h index 6b2b331e368e..f5157a1a10b2 100644 --- a/ydb/core/kqp/common/events/script_executions.h +++ b/ydb/core/kqp/common/events/script_executions.h @@ -221,20 +221,28 @@ struct TEvFetchScriptResultsQueryResponse : public NActors::TEventLocal { + struct TDescription { + TDescription(const TString& executionId, const TString& database, const TString& customerSuppliedId, const TString& userToken) + : ExecutionId(executionId) + , Database(database) + , CustomerSuppliedId(customerSuppliedId) + , UserToken(userToken) + {} + + TString ExecutionId; + TString Database; + + TString CustomerSuppliedId; + TString UserToken; + std::vector Sinks; + std::vector SecretNames; + }; + TEvSaveScriptExternalEffectRequest(const TString& executionId, const TString& database, const TString& customerSuppliedId, const TString& userToken) - : ExecutionId(executionId) - , Database(database) - , CustomerSuppliedId(customerSuppliedId) - , UserToken(userToken) + : Description(executionId, database, customerSuppliedId, userToken) {} - TString ExecutionId; - TString Database; - - TString CustomerSuppliedId; - TString UserToken; - std::vector Sinks; - std::vector SecretNames; + TDescription Description; }; struct TEvSaveScriptExternalEffectResponse : public NActors::TEventLocal { @@ -248,31 +256,41 @@ struct TEvSaveScriptExternalEffectResponse : public NActors::TEventLocal { + struct TDescription { + TDescription(EFinalizationStatus finalizationStatus, const TString& executionId, const TString& database, + Ydb::StatusIds::StatusCode operationStatus, Ydb::Query::ExecStatus execStatus, NYql::TIssues issues, std::optional queryStats, + std::optional queryPlan, std::optional queryAst, std::optional leaseGeneration) + : FinalizationStatus(finalizationStatus) + , ExecutionId(executionId) + , Database(database) + , OperationStatus(operationStatus) + , ExecStatus(execStatus) + , Issues(std::move(issues)) + , QueryStats(std::move(queryStats)) + , QueryPlan(std::move(queryPlan)) + , QueryAst(std::move(queryAst)) + , LeaseGeneration(leaseGeneration) + {} + + EFinalizationStatus FinalizationStatus; + TString ExecutionId; + TString Database; + Ydb::StatusIds::StatusCode OperationStatus; + Ydb::Query::ExecStatus ExecStatus; + NYql::TIssues Issues; + std::optional QueryStats; + std::optional QueryPlan; + std::optional QueryAst; + std::optional LeaseGeneration; + }; + TEvScriptFinalizeRequest(EFinalizationStatus finalizationStatus, const TString& executionId, const TString& database, Ydb::StatusIds::StatusCode operationStatus, Ydb::Query::ExecStatus execStatus, NYql::TIssues issues = {}, std::optional queryStats = std::nullopt, std::optional queryPlan = std::nullopt, std::optional queryAst = std::nullopt, std::optional leaseGeneration = std::nullopt) - : FinalizationStatus(finalizationStatus) - , ExecutionId(executionId) - , Database(database) - , OperationStatus(operationStatus) - , ExecStatus(execStatus) - , Issues(std::move(issues)) - , QueryStats(std::move(queryStats)) - , QueryPlan(std::move(queryPlan)) - , QueryAst(std::move(queryAst)) - , LeaseGeneration(leaseGeneration) + : Description(finalizationStatus, executionId, database, operationStatus, execStatus, issues, queryStats, queryPlan, queryAst, leaseGeneration) {} - EFinalizationStatus FinalizationStatus; - TString ExecutionId; - TString Database; - Ydb::StatusIds::StatusCode OperationStatus; - Ydb::Query::ExecStatus ExecStatus; - NYql::TIssues Issues; - std::optional QueryStats; - std::optional QueryPlan; - std::optional QueryAst; - std::optional LeaseGeneration; + TDescription Description; }; struct TEvScriptFinalizeResponse : public NActors::TEventLocal { @@ -284,15 +302,14 @@ struct TEvScriptFinalizeResponse : public NActors::TEventLocal { - TEvSaveScriptFinalStatusResponse(const TString& customerSuppliedId, const TString& userToken) - : CustomerSuppliedId(customerSuppliedId) - , UserToken(userToken) - {} - + bool ApplicateScriptExternalEffectRequired = false; + bool OperationAlreadyFinalized = false; TString CustomerSuppliedId; TString UserToken; std::vector Sinks; std::vector SecretNames; + Ydb::StatusIds::StatusCode Status; + NYql::TIssues Issues; }; struct TEvDescribeSecretsResponse : public NActors::TEventLocal { diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 283c6a2cde88..29fa1de3bf9f 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -372,6 +372,18 @@ class TKqpCompileActor : public TActorBootstrapped { PassAway(); } + void FillCompileResult(std::unique_ptr preparingQuery, NKikimrKqp::EQueryType queryType) { + auto preparedQueryHolder = std::make_shared( + preparingQuery.release(), AppData()->FunctionRegistry); + preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType); + KqpCompileResult->PreparedQuery = preparedQueryHolder; + KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()); + + if (AstResult) { + KqpCompileResult->Ast = AstResult->Ast; + } + } + void Handle(TEvKqp::TEvContinueProcess::TPtr &ev, const TActorContext &ctx) { Y_ENSURE(!ev->Get()->QueryId); @@ -403,17 +415,7 @@ class TKqpCompileActor : public TActorBootstrapped { if (status == Ydb::StatusIds::SUCCESS) { YQL_ENSURE(kqpResult.PreparingQuery); - { - auto preparedQueryHolder = std::make_shared( - kqpResult.PreparingQuery.release(), AppData()->FunctionRegistry); - preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType); - KqpCompileResult->PreparedQuery = preparedQueryHolder; - KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()); - - if (AstResult) { - KqpCompileResult->Ast = AstResult->Ast; - } - } + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType); auto now = TInstant::Now(); auto duration = now - StartTime; @@ -423,6 +425,10 @@ class TKqpCompileActor : public TActorBootstrapped { << ", self: " << ctx.SelfID << ", duration: " << duration); } else { + if (kqpResult.PreparingQuery) { + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType); + } + LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_ACTOR, "Compilation failed" << ", self: " << ctx.SelfID << ", status: " << Ydb::StatusIds_StatusCode_Name(status) diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp index da29486bd7d5..5d29c47c9260 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp @@ -154,6 +154,14 @@ void TKqpScanFetcherActor::HandleExecute(TEvKqpCompute::TEvScanError::TPtr& ev) } if (state->State == EShardState::Starting) { + ++TotalRetries; + if (TotalRetries >= MAX_TOTAL_SHARD_RETRIES) { + CA_LOG_E("TKqpScanFetcherActor: broken tablet for this request " << state->TabletId + << ", retries limit exceeded (" << state->TotalRetries << "/" << TotalRetries << ")"); + SendGlobalFail(NDqProto::COMPUTE_STATE_FAILURE, YdbStatusToDqStatus(status), issues); + return PassAway(); + } + if (FindSchemeErrorInIssues(status, issues)) { return EnqueueResolveShard(state); } diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 4ff23bfa0165..eeca1aa36882 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -1615,13 +1615,13 @@ class TKqpDataExecuter : public TKqpExecuterBaseSinks.push_back(sink.GetExternalSink()); + scriptExternalEffect->Description.Sinks.push_back(sink.GetExternalSink()); } } } } } - scriptExternalEffect->SecretNames = SecretNames; + scriptExternalEffect->Description.SecretNames = SecretNames; if (!WaitRequired()) { return Execute(); diff --git a/ydb/core/kqp/executer_actor/ya.make b/ydb/core/kqp/executer_actor/ya.make index 9cb7618afb0d..9b0e374f5b7a 100644 --- a/ydb/core/kqp/executer_actor/ya.make +++ b/ydb/core/kqp/executer_actor/ya.make @@ -26,6 +26,7 @@ PEERDIR( ydb/core/client/minikql_compile ydb/core/formats ydb/core/kqp/common + ydb/core/kqp/compute_actor ydb/core/kqp/query_compiler ydb/core/kqp/rm_service ydb/core/kqp/topics diff --git a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp index 6104e99ef9d0..aa6f8b3ac855 100644 --- a/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp +++ b/ydb/core/kqp/federated_query/kqp_federated_query_helpers.cpp @@ -105,11 +105,12 @@ namespace NKikimr::NKqp { GenericGatewaysConfig}; // Init DatabaseAsyncResolver only if all requirements are met - if (DatabaseResolverActorId && GenericGatewaysConfig.HasMdbGateway() && MdbEndpointGenerator) { + if (DatabaseResolverActorId && MdbEndpointGenerator && + (GenericGatewaysConfig.HasMdbGateway() || GenericGatewaysConfig.HasYdbMvpEndpoint())) { result.DatabaseAsyncResolver = std::make_shared( actorSystem, DatabaseResolverActorId.value(), - "", // TODO: use YDB Gateway endpoint? + GenericGatewaysConfig.GetYdbMvpEndpoint(), GenericGatewaysConfig.GetMdbGateway(), MdbEndpointGenerator); } diff --git a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp index 03d2a475bbe3..6ffc4b58b3d2 100644 --- a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp +++ b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp @@ -22,9 +22,9 @@ class TScriptFinalizerActor : public TActorBootstrapped { const NKikimrConfig::TMetadataProviderConfig& metadataProviderConfig, const std::optional& federatedQuerySetup) : ReplyActor_(request->Sender) - , ExecutionId_(request->Get()->ExecutionId) - , Database_(request->Get()->Database) - , FinalizationStatus_(request->Get()->FinalizationStatus) + , ExecutionId_(request->Get()->Description.ExecutionId) + , Database_(request->Get()->Description.Database) + , FinalizationStatus_(request->Get()->Description.FinalizationStatus) , Request_(std::move(request)) , FinalizationTimeout_(TDuration::Seconds(finalizeScriptServiceConfig.GetScriptFinalizationTimeoutSeconds())) , MaximalSecretsSnapshotWaitTime_(2 * TDuration::Seconds(metadataProviderConfig.GetRefreshPeriodSeconds())) @@ -32,16 +32,20 @@ class TScriptFinalizerActor : public TActorBootstrapped { {} void Bootstrap() { - Register(CreateSaveScriptFinalStatusActor(std::move(Request_))); + Register(CreateSaveScriptFinalStatusActor(SelfId(), std::move(Request_))); Become(&TScriptFinalizerActor::FetchState); } STRICT_STFUNC(FetchState, hFunc(TEvSaveScriptFinalStatusResponse, Handle); - hFunc(TEvScriptExecutionFinished, Handle); ) void Handle(TEvSaveScriptFinalStatusResponse::TPtr& ev) { + if (!ev->Get()->ApplicateScriptExternalEffectRequired || ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + Reply(ev->Get()->OperationAlreadyFinalized, ev->Get()->Status, std::move(ev->Get()->Issues)); + return; + } + Schedule(FinalizationTimeout_, new TEvents::TEvWakeup()); Become(&TScriptFinalizerActor::PrepareState); @@ -168,7 +172,7 @@ class TScriptFinalizerActor : public TActorBootstrapped { ) void FinishScriptFinalization(std::optional status, NYql::TIssues issues) { - Register(CreateScriptFinalizationFinisherActor(ExecutionId_, Database_, status, std::move(issues))); + Register(CreateScriptFinalizationFinisherActor(SelfId(), ExecutionId_, Database_, status, std::move(issues))); Become(&TScriptFinalizerActor::FinishState); } @@ -181,7 +185,11 @@ class TScriptFinalizerActor : public TActorBootstrapped { } void Handle(TEvScriptExecutionFinished::TPtr& ev) { - Send(ReplyActor_, ev->Release()); + Reply(ev->Get()->OperationAlreadyFinalized, ev->Get()->Status, std::move(ev->Get()->Issues)); + } + + void Reply(bool operationAlreadyFinalized, Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) { + Send(ReplyActor_, new TEvScriptExecutionFinished(operationAlreadyFinalized, status, std::move(issues))); Send(MakeKqpFinalizeScriptServiceId(SelfId().NodeId()), new TEvScriptFinalizeResponse(ExecutionId_)); PassAway(); diff --git a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp index cf6c66d5b597..6c0868e4c42b 100644 --- a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp +++ b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_service.cpp @@ -27,9 +27,10 @@ class TKqpFinalizeScriptService : public TActorBootstrappedGet()->Sinks = FilterExternalSinks(ev->Get()->Sinks); + auto& description = ev->Get()->Description; + description.Sinks = FilterExternalSinks(description.Sinks); - if (!ev->Get()->Sinks.empty()) { + if (!description.Sinks.empty()) { Register(CreateSaveScriptExternalEffectActor(std::move(ev))); } else { Send(ev->Sender, new TEvSaveScriptExternalEffectResponse(Ydb::StatusIds::SUCCESS, {})); @@ -37,7 +38,7 @@ class TKqpFinalizeScriptService : public TActorBootstrappedGet()->ExecutionId; + TString executionId = ev->Get()->Description.ExecutionId; if (!FinalizationRequestsQueue_.contains(executionId)) { WaitingFinalizationExecutions_.push(executionId); diff --git a/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp b/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp index ad65bde5da0f..ad491f0d9e4c 100644 --- a/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/external_data_source/manager.cpp @@ -66,11 +66,12 @@ void FillCreateExternalDataSourceDesc(NKikimrSchemeOp::TExternalDataSourceDescri } static const TSet properties { - "database_name", - "protocol", - "mdb_cluster_id", + "database_name", + "protocol", // managed PG, CH + "mdb_cluster_id", // managed PG, CH + "database_id", // managed YDB "use_tls", - "schema" + "schema", // managed PG }; for (const auto& property: properties) { diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index 750fe8120dbc..b194950eee7b 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -182,6 +182,10 @@ class TAsyncValidateYqlResult : public TKqpAsyncResultBaseQuery().PrepareOnly); validateResult.PreparedQuery.reset(SessionCtx->Query().PreparingQuery.release()); validateResult.SqlVersion = SqlVersion; @@ -211,6 +215,10 @@ class TAsyncExplainYqlResult : public TKqpAsyncResultBase plans; for (auto id : SessionCtx->Query().ExecutionOrder) { @@ -253,6 +261,10 @@ class TAsyncExecuteYqlResult : public TKqpAsyncResultBase(queryResult.ProtobufArenaPtr.get())); @@ -300,6 +312,10 @@ class TAsyncExecuteKqlResult : public TKqpAsyncResultBaseGetPhysicalQuery().GetQueryPlan(); @@ -320,13 +336,28 @@ class TAsyncPrepareYqlResult : public TKqpAsyncResultBase queryCtx, const TKqpQueryRef& query, TMaybe sqlVersion) + TIntrusivePtr queryCtx, const TKqpQueryRef& query, TMaybe sqlVersion, + TIntrusivePtr transformCtx) : TKqpAsyncResultBase(queryRoot, exprCtx, transformer) , QueryCtx(queryCtx) + , ExprCtx(exprCtx) + , TransformCtx(transformCtx) , QueryText(query.Text) , SqlVersion(sqlVersion) {} void FillResult(TResult& prepareResult) const override { + if (!prepareResult.Success()) { + auto exprRoot = GetExprRoot(); + if (TransformCtx && TransformCtx->ExplainTransformerInput) { + exprRoot = TransformCtx->ExplainTransformerInput; + } + if (exprRoot) { + prepareResult.PreparingQuery = std::move(QueryCtx->PreparingQuery); + prepareResult.PreparingQuery->MutablePhysicalQuery()->SetQueryAst(KqpExprToPrettyString(*exprRoot, ExprCtx)); + } + return; + } + YQL_ENSURE(QueryCtx->PrepareOnly); YQL_ENSURE(QueryCtx->PreparingQuery); @@ -344,6 +375,8 @@ class TAsyncPrepareYqlResult : public TKqpAsyncResultBase QueryCtx; + NYql::TExprContext& ExprCtx; + TIntrusivePtr TransformCtx; TString QueryText; TMaybe SqlVersion; }; @@ -933,6 +966,7 @@ class TKqpHost : public IKqpHost { , IsInternalCall(isInternalCall) , FederatedQuerySetup(federatedQuerySetup) , SessionCtx(new TKikimrSessionContext(funcRegistry, config, TAppData::TimeProvider, TAppData::RandomProvider, userToken)) + , Config(config) , TypesCtx(MakeIntrusive()) , PlanBuilder(CreatePlanBuilder(*TypesCtx)) , FakeWorld(ExprCtx->NewWorld(TPosition())) @@ -1265,7 +1299,7 @@ class TKqpHost : public IKqpHost { } return MakeIntrusive(queryExpr.Get(), ctx, *YqlTransformer, SessionCtx->QueryPtr(), - query.Text, sqlVersion); + query.Text, sqlVersion, TransformCtx); } IAsyncQueryResultPtr PrepareDataQueryAstInternal(const TKqpQueryRef& queryAst, const TPrepareSettings& settings, @@ -1327,7 +1361,7 @@ class TKqpHost : public IKqpHost { } return MakeIntrusive(queryExpr.Get(), ctx, *YqlTransformer, SessionCtx->QueryPtr(), - query.Text, sqlVersion); + query.Text, sqlVersion, TransformCtx); } IAsyncQueryResultPtr PrepareScanQueryInternal(const TKqpQueryRef& query, bool isSql, TExprContext& ctx, @@ -1354,7 +1388,7 @@ class TKqpHost : public IKqpHost { } return MakeIntrusive(queryExpr.Get(), ctx, *YqlTransformer, SessionCtx->QueryPtr(), - query.Text, sqlVersion); + query.Text, sqlVersion, TransformCtx); } IAsyncQueryResultPtr PrepareScanQueryAstInternal(const TKqpQueryRef& queryAst, TExprContext& ctx) { @@ -1474,11 +1508,12 @@ class TKqpHost : public IKqpHost { state->CredentialsFactory = FederatedQuerySetup->CredentialsFactory; state->Configuration->WriteThroughDqIntegration = true; state->Configuration->AllowAtomicUploadCommit = queryType == EKikimrQueryType::Script; - state->Configuration->Init(FederatedQuerySetup->S3GatewayConfig, TypesCtx); + state->Gateway = FederatedQuerySetup->HttpGateway; + state->ExecutorPoolId = AppData()->UserPoolId; - auto dataSource = NYql::CreateS3DataSource(state, FederatedQuerySetup->HttpGateway); - auto dataSink = NYql::CreateS3DataSink(state, FederatedQuerySetup->HttpGateway); + auto dataSource = NYql::CreateS3DataSource(state); + auto dataSink = NYql::CreateS3DataSink(state); TypesCtx->AddDataSource(NYql::S3ProviderName, std::move(dataSource)); TypesCtx->AddDataSink(NYql::S3ProviderName, std::move(dataSink)); @@ -1493,6 +1528,7 @@ class TKqpHost : public IKqpHost { TypesCtx.Get(), FuncRegistry, FederatedQuerySetup->DatabaseAsyncResolver, + FederatedQuerySetup->CredentialsFactory, FederatedQuerySetup->ConnectorClient, FederatedQuerySetup->GenericGatewayConfig ); @@ -1502,7 +1538,8 @@ class TKqpHost : public IKqpHost { } void Init(EKikimrQueryType queryType) { - KqpRunner = CreateKqpRunner(Gateway, Cluster, TypesCtx, SessionCtx, *FuncRegistry); + TransformCtx = MakeIntrusive(Config, SessionCtx->QueryPtr(), SessionCtx->TablesPtr()); + KqpRunner = CreateKqpRunner(Gateway, Cluster, TypesCtx, SessionCtx, TransformCtx, *FuncRegistry); ExprCtx->NodesAllocationLimit = SessionCtx->Config()._KqpExprNodesAllocationLimit.Get().GetRef(); ExprCtx->StringsAllocationLimit = SessionCtx->Config()._KqpExprStringsAllocationLimit.Get().GetRef(); @@ -1635,6 +1672,7 @@ class TKqpHost : public IKqpHost { std::optional FederatedQuerySetup; TIntrusivePtr SessionCtx; + TKikimrConfiguration::TPtr Config; TIntrusivePtr FuncRegistryHolder; const NKikimr::NMiniKQL::IFunctionRegistry* FuncRegistry; @@ -1648,6 +1686,7 @@ class TKqpHost : public IKqpHost { TExprNode::TPtr FakeWorld; TIntrusivePtr ExecuteCtx; + TIntrusivePtr TransformCtx; TIntrusivePtr KqpRunner; NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory{NExternalSource::CreateExternalSourceFactory({})}; diff --git a/ydb/core/kqp/host/kqp_host_impl.h b/ydb/core/kqp/host/kqp_host_impl.h index 550f9e2776d3..17110a986926 100644 --- a/ydb/core/kqp/host/kqp_host_impl.h +++ b/ydb/core/kqp/host/kqp_host_impl.h @@ -34,7 +34,9 @@ class TKqpAsyncResultBase : public NYql::IKikimrAsyncResult { YQL_ENSURE(HasResult()); if (Status.GetValue() == NYql::IGraphTransformer::TStatus::Error) { - return NYql::NCommon::ResultFromErrors(ExprCtx.IssueManager.GetIssues()); + TResult result = NYql::NCommon::ResultFromErrors(ExprCtx.IssueManager.GetIssues()); + FillResult(result); + return result; } YQL_ENSURE(Status.GetValue() == NYql::IGraphTransformer::TStatus::Ok); @@ -244,7 +246,7 @@ class IKqpRunner : public TThrRefBase { TIntrusivePtr CreateKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const NMiniKQL::IFunctionRegistry& funcRegistry); + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry); TAutoPtr CreateKqpExplainPreparedTransformer(TIntrusivePtr gateway, const TString& cluster, TIntrusivePtr transformCtx, const NMiniKQL::IFunctionRegistry* funcRegistry, diff --git a/ydb/core/kqp/host/kqp_runner.cpp b/ydb/core/kqp/host/kqp_runner.cpp index 6e0d9b7f98bc..8e113670b9ca 100644 --- a/ydb/core/kqp/host/kqp_runner.cpp +++ b/ydb/core/kqp/host/kqp_runner.cpp @@ -137,14 +137,14 @@ class TKqpRunner : public IKqpRunner { public: TKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const NMiniKQL::IFunctionRegistry& funcRegistry) + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry) : Gateway(gateway) , Cluster(cluster) , TypesCtx(*typesCtx) , SessionCtx(sessionCtx) , FunctionRegistry(funcRegistry) , Config(sessionCtx->ConfigPtr()) - , TransformCtx(MakeIntrusive(Config, sessionCtx->QueryPtr(), sessionCtx->TablesPtr())) + , TransformCtx(transformCtx) , OptimizeCtx(MakeIntrusive(cluster, Config, sessionCtx->QueryPtr(), sessionCtx->TablesPtr())) , BuildQueryCtx(MakeIntrusive()) @@ -377,9 +377,9 @@ class TKqpRunner : public IKqpRunner { TIntrusivePtr CreateKqpRunner(TIntrusivePtr gateway, const TString& cluster, const TIntrusivePtr& typesCtx, const TIntrusivePtr& sessionCtx, - const NMiniKQL::IFunctionRegistry& funcRegistry) + const TIntrusivePtr& transformCtx, const NMiniKQL::IFunctionRegistry& funcRegistry) { - return new TKqpRunner(gateway, cluster, typesCtx, sessionCtx, funcRegistry); + return new TKqpRunner(gateway, cluster, typesCtx, sessionCtx, transformCtx, funcRegistry); } } // namespace NKqp diff --git a/ydb/core/kqp/node_service/ya.make b/ydb/core/kqp/node_service/ya.make index 7dc91a19f358..8ffe88e4925e 100644 --- a/ydb/core/kqp/node_service/ya.make +++ b/ydb/core/kqp/node_service/ya.make @@ -10,6 +10,7 @@ PEERDIR( ydb/core/base ydb/core/cms/console ydb/core/kqp/common + ydb/core/kqp/compute_actor ydb/core/kqp/counters ydb/core/mind ydb/core/protos diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 7ec98aa7f569..4665639536dd 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -111,8 +112,28 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { } TMaybeNode RewriteAggregate(TExprBase node, TExprContext& ctx) { - TExprBase output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown() || KqpCtx.Config->HasOptUseFinalizeByKey(), KqpCtx.Config->HasOptUseFinalizeByKey()); - DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Ptr(), ctx); + TMaybeNode output; + auto aggregate = node.Cast(); + auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); + if (hopSetting) { + auto input = aggregate.Input().Maybe(); + if (!input) { + return node; + } + output = NHopping::RewriteAsHoppingWindow( + node, + ctx, + input.Cast(), + false, // analyticsHopping + TDuration::MilliSeconds(TDqSettings::TDefault::WatermarksLateArrivalDelayMs), + true, // defaultWatermarksMode + true); // syncActor + } else { + output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown() || KqpCtx.Config->HasOptUseFinalizeByKey(), KqpCtx.Config->HasOptUseFinalizeByKey()); + } + if (output) { + DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Cast().Ptr(), ctx); + } return output; } diff --git a/ydb/core/kqp/proxy_service/kqp_script_executions.cpp b/ydb/core/kqp/proxy_service/kqp_script_executions.cpp index 56a9dae3b58a..a2abb043fc36 100644 --- a/ydb/core/kqp/proxy_service/kqp_script_executions.cpp +++ b/ydb/core/kqp/proxy_service/kqp_script_executions.cpp @@ -478,7 +478,7 @@ class TScriptLeaseUpdater : public TQueryBase { class TScriptLeaseUpdateActor : public TActorBootstrapped { public: - using IRetryPolicy = IRetryPolicy; + using TLeaseUpdateRetryActor = TQueryRetryActor; TScriptLeaseUpdateActor(const TActorId& runScriptActorId, const TString& database, const TString& executionId, TDuration leaseDuration, TIntrusivePtr counters) : RunScriptActorId(runScriptActorId) @@ -489,44 +489,20 @@ class TScriptLeaseUpdateActor : public TActorBootstrapped::max(), LeaseDuration / 2), + Database, ExecutionId, LeaseDuration + )); Become(&TScriptLeaseUpdateActor::StateFunc); } STRICT_STFUNC(StateFunc, hFunc(TEvScriptLeaseUpdateResponse, Handle); - hFunc(NActors::TEvents::TEvWakeup, Wakeup); ) - void Wakeup(NActors::TEvents::TEvWakeup::TPtr&) { - CreateScriptLeaseUpdater(); - } - void Handle(TEvScriptLeaseUpdateResponse::TPtr& ev) { - auto queryStatus = ev->Get()->Status; - if (!ev->Get()->ExecutionEntryExists && queryStatus == Ydb::StatusIds::BAD_REQUEST || queryStatus == Ydb::StatusIds::SUCCESS) { - Reply(std::move(ev)); - return; - } - - if (RetryState == nullptr) { - CreateRetryState(); - } - - const TMaybe delay = RetryState->GetNextRetryDelay(queryStatus); - if (delay) { - Schedule(*delay, new NActors::TEvents::TEvWakeup()); - } else { - Reply(std::move(ev)); - } - } - - void Reply(TEvScriptLeaseUpdateResponse::TPtr&& ev) { if (Counters) { Counters->ReportLeaseUpdateLatency(TInstant::Now() - LeaseUpdateStartTime); } @@ -534,33 +510,6 @@ class TScriptLeaseUpdateActor : public TActorBootstrapped::max(), LeaseDuration / 2); - RetryState = policy->CreateRetryState(); - } - private: TActorId RunScriptActorId; TString Database; @@ -568,7 +517,6 @@ class TScriptLeaseUpdateActor : public TActorBootstrapped Counters; TInstant LeaseUpdateStartTime; - IRetryPolicy::IRetryState::TPtr RetryState = nullptr; }; class TCheckLeaseStatusActorBase : public TActorBootstrapped { @@ -646,9 +594,9 @@ class TCheckLeaseStatusActorBase : public TActorBootstrappedOperationStatus; - FinalExecStatus = ScriptFinalizeRequest->ExecStatus; - FinalIssues = ScriptFinalizeRequest->Issues; + FinalOperationStatus = ScriptFinalizeRequest->Description.OperationStatus; + FinalExecStatus = ScriptFinalizeRequest->Description.ExecStatus; + FinalIssues = ScriptFinalizeRequest->Description.Issues; Send(MakeKqpFinalizeScriptServiceId(SelfId().NodeId()), ScriptFinalizeRequest.release()); } @@ -884,7 +832,6 @@ class TCheckLeaseStatusActor : public TCheckLeaseStatusActorBase { class TForgetScriptExecutionOperationQueryActor : public TQueryBase { static constexpr i64 MAX_NUMBER_ROWS_IN_BATCH = 100000; - static constexpr TDuration MINIMAL_DEADLINE_TIME = TDuration::Seconds(1); struct TResultSetDescription { i64 MaxRowId; @@ -895,7 +842,7 @@ class TForgetScriptExecutionOperationQueryActor : public TQueryBase { TForgetScriptExecutionOperationQueryActor(const TString& executionId, const TString& database, TInstant operationDeadline) : ExecutionId(executionId) , Database(database) - , Deadline(operationDeadline - MINIMAL_DEADLINE_TIME) + , Deadline(operationDeadline) {} void OnRunQuery() override { @@ -1022,10 +969,14 @@ class TForgetScriptExecutionOperationQueryActor : public TQueryBase { Send(Owner, new TEvForgetScriptExecutionOperationResponse(status, std::move(issues))); } + static NYql::TIssues ForgetOperationTimeoutIssues() { + return { NYql::TIssue("Forget script execution operation timeout") }; + } + private: bool CheckDeadline() { if (TInstant::Now() >= Deadline) { - Finish(Ydb::StatusIds::TIMEOUT, "Forget script execution operation timeout"); + Finish(Ydb::StatusIds::TIMEOUT, ForgetOperationTimeoutIssues()); return false; } return true; @@ -1040,6 +991,8 @@ class TForgetScriptExecutionOperationQueryActor : public TQueryBase { class TForgetScriptExecutionOperationActor : public TActorBootstrapped { public: + using TForgetOperationRetryActor = TQueryRetryActor; + explicit TForgetScriptExecutionOperationActor(TEvForgetScriptExecutionOperation::TPtr ev) : Request(std::move(ev)) {} @@ -1075,7 +1028,18 @@ class TForgetScriptExecutionOperationActor : public TActorBootstrappedGet()->Database, Request->Get()->Deadline)); + TDuration minDelay = TDuration::MilliSeconds(10); + TDuration maxTime = Request->Get()->Deadline - TInstant::Now() - TDuration::Seconds(1); + if (maxTime <= minDelay) { + Reply(Ydb::StatusIds::TIMEOUT, TForgetScriptExecutionOperationQueryActor::ForgetOperationTimeoutIssues()); + return; + } + + Register(new TForgetOperationRetryActor( + SelfId(), + TForgetOperationRetryActor::IRetryPolicy::GetExponentialBackoffPolicy(TForgetOperationRetryActor::Retryable, minDelay, TDuration::MilliSeconds(200), TDuration::Seconds(1), std::numeric_limits::max(), maxTime), + ExecutionId, Request->Get()->Database, TInstant::Now() + maxTime + )); } void Handle(TEvForgetScriptExecutionOperationResponse::TPtr& ev) { @@ -1315,6 +1279,7 @@ class TGetScriptExecutionOperationActor : public TCheckLeaseStatusActorBase { Response->Get()->Ready = false; Response->Get()->Status = Ydb::StatusIds::SUCCESS; Response->Get()->Issues.Clear(); + Response->Get()->Metadata.set_exec_status(Ydb::Query::ExecStatus::EXEC_STATUS_UNSPECIFIED); } else { Response->Get()->Ready = true; Response->Get()->Status = GetOperationStatus(); @@ -1756,43 +1721,9 @@ class TSaveScriptExecutionResultMetaQuery : public TQueryBase { const TString SerializedMetas; }; -class TSaveScriptExecutionResultMetaActor : public TActorBootstrapped { -public: - TSaveScriptExecutionResultMetaActor(const NActors::TActorId& replyActorId, const TString& database, const TString& executionId, const TString& serializedMetas) - : ReplyActorId(replyActorId), Database(database), ExecutionId(executionId), SerializedMetas(serializedMetas) - { - } - - void Bootstrap() { - Register(new TSaveScriptExecutionResultMetaQuery(Database, ExecutionId, SerializedMetas)); - - Become(&TSaveScriptExecutionResultMetaActor::StateFunc); - } - - STRICT_STFUNC(StateFunc, - hFunc(TEvSaveScriptResultMetaFinished, Handle); - ) - - void Handle(TEvSaveScriptResultMetaFinished::TPtr& ev) { - if (ev->Get()->Status == Ydb::StatusIds::ABORTED) { - Register(new TSaveScriptExecutionResultMetaQuery(Database, ExecutionId, SerializedMetas)); - return; - } - - Send(ev->Forward(ReplyActorId)); - PassAway(); - } - -private: - const NActors::TActorId ReplyActorId; - const TString Database; - const TString ExecutionId; - const TString SerializedMetas; -}; - class TSaveScriptExecutionResultQuery : public TQueryBase { public: - TSaveScriptExecutionResultQuery(const TString& database, const TString& executionId, i32 resultSetId, TMaybe expireAt, i64 firstRow, Ydb::ResultSet&& resultSet) + TSaveScriptExecutionResultQuery(const TString& database, const TString& executionId, i32 resultSetId, TMaybe expireAt, i64 firstRow, Ydb::ResultSet resultSet) : Database(database), ExecutionId(executionId), ResultSetId(resultSetId), ExpireAt(expireAt), FirstRow(firstRow), ResultSet(std::move(resultSet)) { } @@ -1895,7 +1826,7 @@ class TSaveScriptExecutionResultActor : public TActorBootstrapped, i64, Ydb::ResultSet>(SelfId(), Database, ExecutionId, ResultSetId, ExpireAt, FirstRow, ResultSets.back())); FirstRow += numberRows; ResultSets.pop_back(); @@ -1977,13 +1908,13 @@ class TGetScriptExecutionResultQuery : public TQueryBase { AND execution_id = $execution_id AND (expire_at > CurrentUtcTimestamp() OR expire_at IS NULL); - SELECT row_id, result_set + SELECT database, execution_id, result_set_id, row_id, result_set FROM `.metadata/result_sets` WHERE database = $database AND execution_id = $execution_id AND result_set_id = $result_set_id AND row_id >= $offset - ORDER BY row_id + ORDER BY database, execution_id, result_set_id, row_id LIMIT $limit; )"; @@ -2203,8 +2134,8 @@ class TGetScriptExecutionResultActor : public TActorBootstrappedGet()->Database) + .Utf8(Request.Database) .Build() .AddParam("$execution_id") - .Utf8(Request->Get()->ExecutionId) + .Utf8(Request.ExecutionId) .Build() .AddParam("$customer_supplied_id") - .Utf8(Request->Get()->CustomerSuppliedId) + .Utf8(Request.CustomerSuppliedId) .Build() .AddParam("$user_token") - .Utf8(Request->Get()->UserToken) + .Utf8(Request.UserToken) .Build() .AddParam("$script_sinks") - .JsonDocument(SerializeSinks(Request->Get()->Sinks)) + .JsonDocument(SerializeSinks(Request.Sinks)) .Build() .AddParam("$script_secret_names") - .JsonDocument(SerializeSecretNames(Request->Get()->SecretNames)) + .JsonDocument(SerializeSecretNames(Request.SecretNames)) .Build(); RunDataQuery(sql, ¶ms); @@ -2255,7 +2186,7 @@ class TSaveScriptExternalEffectActor : public TQueryBase { } void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { - Send(Request->Sender, new TEvSaveScriptExternalEffectResponse(status, std::move(issues))); + Send(Owner, new TEvSaveScriptExternalEffectResponse(status, std::move(issues))); } private: @@ -2292,14 +2223,16 @@ class TSaveScriptExternalEffectActor : public TQueryBase { } private: - TEvSaveScriptExternalEffectRequest::TPtr Request; + TEvSaveScriptExternalEffectRequest::TDescription Request; }; class TSaveScriptFinalStatusActor : public TQueryBase { public: - explicit TSaveScriptFinalStatusActor(TEvScriptFinalizeRequest::TPtr ev) - : Request(ev) - {} + explicit TSaveScriptFinalStatusActor(const TEvScriptFinalizeRequest::TDescription& request) + : Request(request) + { + Response = std::make_unique(); + } void OnRunQuery() override { TString sql = R"( @@ -2328,10 +2261,10 @@ class TSaveScriptFinalStatusActor : public TQueryBase { NYdb::TParamsBuilder params; params .AddParam("$database") - .Utf8(Request->Get()->Database) + .Utf8(Request.Database) .Build() .AddParam("$execution_id") - .Utf8(Request->Get()->ExecutionId) + .Utf8(Request.ExecutionId) .Build(); RunDataQuery(sql, ¶ms, TTxControl::BeginTx()); @@ -2354,16 +2287,16 @@ class TSaveScriptFinalStatusActor : public TQueryBase { TMaybe finalizationStatus = result.ColumnParser("finalization_status").GetOptionalInt32(); if (finalizationStatus) { - if (Request->Get()->FinalizationStatus != *finalizationStatus) { + if (Request.FinalizationStatus != *finalizationStatus) { Finish(Ydb::StatusIds::PRECONDITION_FAILED, "Execution already have different finalization status"); return; } - ApplicateScriptExternalEffectRequired = true; + Response->ApplicateScriptExternalEffectRequired = true; } TMaybe operationStatus = result.ColumnParser("operation_status").GetOptionalInt32(); - if (Request->Get()->LeaseGeneration && !operationStatus) { + if (Request.LeaseGeneration && !operationStatus) { NYdb::TResultSetParser leaseResult(ResultSets[1]); if (leaseResult.RowsCount() == 0) { Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected operation state"); @@ -2378,7 +2311,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { return; } - if (*Request->Get()->LeaseGeneration != static_cast(*leaseGenerationInDatabase)) { + if (*Request.LeaseGeneration != static_cast(*leaseGenerationInDatabase)) { Finish(Ydb::StatusIds::PRECONDITION_FAILED, "Lease was lost"); return; } @@ -2386,12 +2319,12 @@ class TSaveScriptFinalStatusActor : public TQueryBase { TMaybe customerSuppliedId = result.ColumnParser("customer_supplied_id").GetOptionalUtf8(); if (customerSuppliedId) { - CustomerSuppliedId = *customerSuppliedId; + Response->CustomerSuppliedId = *customerSuppliedId; } TMaybe userToken = result.ColumnParser("user_token").GetOptionalUtf8(); if (userToken) { - UserToken = *userToken; + Response->UserToken = *userToken; } SerializedSinks = result.ColumnParser("script_sinks").GetOptionalJsonDocument(); @@ -2408,7 +2341,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { NKqpProto::TKqpExternalSink sink; NProtobufJson::Json2Proto(*serializedSink, sink); - Sinks.push_back(sink); + Response->Sinks.push_back(sink); } } @@ -2424,7 +2357,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { const NJson::TJsonValue* serializedSecretName; value.GetValuePointer(i, &serializedSecretName); - SecretNames.push_back(serializedSecretName->GetString()); + Response->SecretNames.push_back(serializedSecretName->GetString()); } } @@ -2443,12 +2376,12 @@ class TSaveScriptFinalStatusActor : public TQueryBase { if (operationStatus) { FinalStatusAlreadySaved = true; - OperationAlreadyFinalized = !finalizationStatus; + Response->OperationAlreadyFinalized = !finalizationStatus; CommitTransaction(); return; } - ApplicateScriptExternalEffectRequired = ApplicateScriptExternalEffectRequired || HasExternalEffect(); + Response->ApplicateScriptExternalEffectRequired = Response->ApplicateScriptExternalEffectRequired || HasExternalEffect(); FinishScriptExecution(); } @@ -2493,10 +2426,10 @@ class TSaveScriptFinalStatusActor : public TQueryBase { )"; TString serializedStats = "{}"; - if (Request->Get()->QueryStats) { + if (Request.QueryStats) { NJson::TJsonValue statsJson; Ydb::TableStats::QueryStats queryStats; - NGRpcService::FillQueryStats(queryStats, *Request->Get()->QueryStats); + NGRpcService::FillQueryStats(queryStats, *Request.QueryStats); NProtobufJson::Proto2Json(queryStats, statsJson, NProtobufJson::TProto2JsonConfig()); serializedStats = NJson::WriteJson(statsJson); } @@ -2504,40 +2437,40 @@ class TSaveScriptFinalStatusActor : public TQueryBase { NYdb::TParamsBuilder params; params .AddParam("$database") - .Utf8(Request->Get()->Database) + .Utf8(Request.Database) .Build() .AddParam("$execution_id") - .Utf8(Request->Get()->ExecutionId) + .Utf8(Request.ExecutionId) .Build() .AddParam("$operation_status") - .Int32(Request->Get()->OperationStatus) + .Int32(Request.OperationStatus) .Build() .AddParam("$execution_status") - .Int32(Request->Get()->ExecStatus) + .Int32(Request.ExecStatus) .Build() .AddParam("$finalization_status") - .Int32(Request->Get()->FinalizationStatus) + .Int32(Request.FinalizationStatus) .Build() .AddParam("$issues") - .JsonDocument(SerializeIssues(Request->Get()->Issues)) + .JsonDocument(SerializeIssues(Request.Issues)) .Build() .AddParam("$plan") - .JsonDocument(Request->Get()->QueryPlan.value_or("{}")) + .JsonDocument(Request.QueryPlan.value_or("{}")) .Build() .AddParam("$stats") .JsonDocument(serializedStats) .Build() .AddParam("$ast") - .Utf8(Request->Get()->QueryAst.value_or("")) + .Utf8(Request.QueryAst.value_or("")) .Build() .AddParam("$operation_ttl") .Interval(static_cast(OperationTtl.MicroSeconds())) .Build() .AddParam("$customer_supplied_id") - .Utf8(CustomerSuppliedId) + .Utf8(Response->CustomerSuppliedId) .Build() .AddParam("$user_token") - .Utf8(UserToken) + .Utf8(Response->UserToken) .Build() .AddParam("$script_sinks") .OptionalJsonDocument(SerializedSinks) @@ -2546,7 +2479,7 @@ class TSaveScriptFinalStatusActor : public TQueryBase { .OptionalJsonDocument(SerializedSecretNames) .Build() .AddParam("$applicate_script_external_effect_required") - .Bool(ApplicateScriptExternalEffectRequired) + .Bool(Response->ApplicateScriptExternalEffectRequired) .Build(); RunDataQuery(sql, ¶ms, TTxControl::ContinueAndCommitTx()); @@ -2559,42 +2492,31 @@ class TSaveScriptFinalStatusActor : public TQueryBase { void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { if (!FinalStatusAlreadySaved) { - KQP_PROXY_LOG_D("Finish script execution operation. ExecutionId: " << Request->Get()->ExecutionId - << ". " << Ydb::StatusIds::StatusCode_Name(Request->Get()->OperationStatus) - << ". Issues: " << Request->Get()->Issues.ToOneLineString() << ". Plan: " << Request->Get()->QueryPlan.value_or("")); - } - - if (!ApplicateScriptExternalEffectRequired || status != Ydb::StatusIds::SUCCESS) { - Send(Owner, new TEvScriptExecutionFinished(OperationAlreadyFinalized, status, issues)); - return; + KQP_PROXY_LOG_D("Finish script execution operation. ExecutionId: " << Request.ExecutionId + << ". " << Ydb::StatusIds::StatusCode_Name(Request.OperationStatus) + << ". Issues: " << Request.Issues.ToOneLineString() << ". Plan: " << Request.QueryPlan.value_or("")); } - auto response = std::make_unique(CustomerSuppliedId, UserToken); - response->Sinks = std::move(Sinks); - response->SecretNames = std::move(SecretNames); + Response->Status = status; + Response->Issues = std::move(issues); - Send(Owner, response.release()); + Send(Owner, Response.release()); } private: bool HasExternalEffect() const { - return !Sinks.empty(); + return !Response->Sinks.empty(); } private: - TEvScriptFinalizeRequest::TPtr Request; + TEvScriptFinalizeRequest::TDescription Request; + std::unique_ptr Response; - bool OperationAlreadyFinalized = false; bool FinalStatusAlreadySaved = false; - bool ApplicateScriptExternalEffectRequired = false; TDuration OperationTtl; - TString CustomerSuppliedId; - TString UserToken; TMaybe SerializedSinks; - std::vector Sinks; TMaybe SerializedSecretNames; - std::vector SecretNames; }; class TScriptFinalizationFinisherActor : public TQueryBase { @@ -2830,7 +2752,7 @@ NActors::IActor* CreateScriptLeaseUpdateActor(const TActorId& runScriptActorId, } NActors::IActor* CreateSaveScriptExecutionResultMetaActor(const NActors::TActorId& runScriptActorId, const TString& database, const TString& executionId, const TString& serializedMeta) { - return new TSaveScriptExecutionResultMetaActor(runScriptActorId, database, executionId, serializedMeta); + return new TQueryRetryActor(runScriptActorId, database, executionId, serializedMeta); } NActors::IActor* CreateSaveScriptExecutionResultActor(const NActors::TActorId& runScriptActorId, const TString& database, const TString& executionId, i32 resultSetId, TMaybe expireAt, i64 firstRow, Ydb::ResultSet&& resultSet) { @@ -2842,15 +2764,15 @@ NActors::IActor* CreateGetScriptExecutionResultActor(const NActors::TActorId& re } NActors::IActor* CreateSaveScriptExternalEffectActor(TEvSaveScriptExternalEffectRequest::TPtr ev) { - return new TSaveScriptExternalEffectActor(std::move(ev)); + return new TQueryRetryActor(ev->Sender, ev->Get()->Description); } -NActors::IActor* CreateSaveScriptFinalStatusActor(TEvScriptFinalizeRequest::TPtr ev) { - return new TSaveScriptFinalStatusActor(std::move(ev)); +NActors::IActor* CreateSaveScriptFinalStatusActor(const NActors::TActorId& finalizationActorId, TEvScriptFinalizeRequest::TPtr ev) { + return new TQueryRetryActor(finalizationActorId, ev->Get()->Description); } -NActors::IActor* CreateScriptFinalizationFinisherActor(const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues) { - return new TScriptFinalizationFinisherActor(executionId, database, operationStatus, std::move(operationIssues)); +NActors::IActor* CreateScriptFinalizationFinisherActor(const NActors::TActorId& finalizationActorId, const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues) { + return new TQueryRetryActor, NYql::TIssues>(finalizationActorId, executionId, database, operationStatus, operationIssues); } NActors::IActor* CreateScriptProgressActor(const TString& executionId, const TString& database, const TString& queryPlan, const TString& queryStats) { diff --git a/ydb/core/kqp/proxy_service/kqp_script_executions.h b/ydb/core/kqp/proxy_service/kqp_script_executions.h index 5781046a1df7..ea4fae00e842 100644 --- a/ydb/core/kqp/proxy_service/kqp_script_executions.h +++ b/ydb/core/kqp/proxy_service/kqp_script_executions.h @@ -33,8 +33,8 @@ NActors::IActor* CreateGetScriptExecutionResultActor(const NActors::TActorId& re // Compute external effects and updates status in database NActors::IActor* CreateSaveScriptExternalEffectActor(TEvSaveScriptExternalEffectRequest::TPtr ev); -NActors::IActor* CreateSaveScriptFinalStatusActor(TEvScriptFinalizeRequest::TPtr ev); -NActors::IActor* CreateScriptFinalizationFinisherActor(const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues); +NActors::IActor* CreateSaveScriptFinalStatusActor(const NActors::TActorId& finalizationActorId, TEvScriptFinalizeRequest::TPtr ev); +NActors::IActor* CreateScriptFinalizationFinisherActor(const NActors::TActorId& finalizationActorId, const TString& executionId, const TString& database, std::optional operationStatus, NYql::TIssues operationIssues); NActors::IActor* CreateScriptProgressActor(const TString& executionId, const TString& database, const TString& queryPlan, const TString& queryStats); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 3e75c1b9ba42..608147bd557b 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -947,11 +947,6 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { NYql::IDqIntegration* dqIntegration = provider->second->GetDqIntegration(); YQL_ENSURE(dqIntegration, "Unsupported dq source for provider: \"" << dataSourceCategory << "\""); auto& externalSource = *protoSource->MutableExternalSource(); - google::protobuf::Any& settings = *externalSource.MutableSettings(); - TString& sourceType = *externalSource.MutableType(); - dqIntegration->FillSourceSettings(source.Ref(), settings, sourceType); - YQL_ENSURE(!settings.type_url().empty(), "Data source provider \"" << dataSourceCategory << "\" did't fill dq source settings for its dq source node"); - YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceCategory << "\" did't fill dq source settings type for its dq source node"); // Partitioning TVector partitionParams; @@ -976,6 +971,12 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { externalSource.SetAuthInfo(CreateStructuredTokenParser(token).ToBuilder().RemoveSecrets().ToJson()); CreateStructuredTokenParser(token).ListReferences(SecretNames); } + + google::protobuf::Any& settings = *externalSource.MutableSettings(); + TString& sourceType = *externalSource.MutableType(); + dqIntegration->FillSourceSettings(source.Ref(), settings, sourceType, maxTasksPerStage); + YQL_ENSURE(!settings.type_url().empty(), "Data source provider \"" << dataSourceCategory << "\" didn't fill dq source settings for its dq source node"); + YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceCategory << "\" didn't fill dq source settings type for its dq source node"); } } diff --git a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp index 76c06908c638..e346725ef5ea 100644 --- a/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp +++ b/ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp @@ -206,10 +206,6 @@ class TRunScriptActor : public NActors::TActorBootstrapped { WaitFinalizationRequest = true; RunState = IsExecuting() ? ERunState::Finishing : RunState; - if (RunState == ERunState::Cancelling) { - Issues.AddIssue("Script execution is cancelled"); - } - auto scriptFinalizeRequest = std::make_unique( GetFinalizationStatusFromRunState(), ExecutionId, Database, Status, GetExecStatusFromStatusCode(Status), Issues, std::move(QueryStats), std::move(QueryPlan), std::move(QueryAst), LeaseGeneration diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 2d43845429eb..6bf41c2962ee 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1758,10 +1758,17 @@ class TKqpSessionActor : public TActorBootstrapped { const auto& phyQuery = QueryState->PreparedQuery->GetPhysicalQuery(); FillColumnsMeta(phyQuery, response); - } else if (compileResult->Status == Ydb::StatusIds::TIMEOUT && QueryState->QueryDeadlines.CancelAt) { - // The compile timeout cause cancelation execution of request. - // So in case of cancel after we can reply with canceled status - ev.SetYdbStatus(Ydb::StatusIds::CANCELLED); + } else { + if (compileResult->Status == Ydb::StatusIds::TIMEOUT && QueryState->QueryDeadlines.CancelAt) { + // The compile timeout cause cancelation execution of request. + // So in case of cancel after we can reply with canceled status + ev.SetYdbStatus(Ydb::StatusIds::CANCELLED); + } + + auto& preparedQuery = compileResult->PreparedQuery; + if (preparedQuery && QueryState->ReportStats() && QueryState->GetStatsMode() >= Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL) { + response.SetQueryAst(preparedQuery->GetPhysicalQuery().GetQueryAst()); + } } } diff --git a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp b/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp deleted file mode 100644 index 7b426cbdd468..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "ch_recipe_ut_helpers.h" - -#include -#include - -namespace NTestUtils { - - TString GetChHost() { - return "localhost"; - } - - ui32 GetChPort() { - return 19000; - } - - TString GetChUser() { - return "user"; - } - - TString GetChPassword() { - return "password"; - } - - TString GetChDatabase() { - return "default"; - } - - NClickHouse::TClient CreateClickhouseClient() { - NClickHouse::TClientOptions opt; - opt - .SetHost(GetChHost()) - .SetPort(GetChPort()) - .SetUser(GetChUser()) - .SetPassword(GetChPassword()); - - TInstant start = TInstant::Now(); - ui32 attempt = 0; - while ((TInstant::Now() - start).Seconds() < 60) { - attempt += 1; - try { - return NClickHouse::TClient(opt); - } catch (const TSystemError& e) { - Cerr << "Attempt " << attempt << ": " << e.what() << Endl; - Sleep(TDuration::MilliSeconds(100)); - } - } - - throw yexception() << "Failed to connect ClickHouse in " << attempt << " attempt(s)"; - } - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h b/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h deleted file mode 100644 index c8f573979b51..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/ch_recipe_ut_helpers.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -namespace NTestUtils { - - TString GetChHost(); - ui32 GetChPort(); - TString GetChUser(); - TString GetChPassword(); - TString GetChDatabase(); - - NClickHouse::TClient CreateClickhouseClient(); - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp b/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp deleted file mode 100644 index 9b1ea42d4be5..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "connector_recipe_ut_helpers.h" - -#include -#include - -namespace NTestUtils { - - TString GetConnectorHost() { - return "localhost"; - } - - ui32 GetConnectorPort() { - return 50051; - } - - std::shared_ptr MakeKikimrRunnerWithConnector() { - NYql::TGenericConnectorConfig clientCfg; - clientCfg.MutableEndpoint()->set_host(GetConnectorHost()); - clientCfg.MutableEndpoint()->set_port(GetConnectorPort()); - - NKikimrConfig::TAppConfig appCfg; - appCfg.MutableFeatureFlags()->SetEnableExternalDataSources(true); - - auto kikimr = NKikimr::NKqp::NFederatedQueryTest::MakeKikimrRunner( - NYql::IHTTPGateway::Make(), - NYql::NConnector::MakeClientGRPC(clientCfg), - nullptr, - appCfg); - kikimr->GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableExternalDataSources(true); - return kikimr; - } - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h b/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h deleted file mode 100644 index e0ff53b228c6..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/connector_recipe_ut_helpers.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include -#include -#include - -#include - -namespace NTestUtils { - - TString GetConnectorHost(); - ui32 GetConnectorPort(); - - std::shared_ptr MakeKikimrRunnerWithConnector(); - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/docker-compose.yml b/ydb/core/kqp/ut/federated_query/generic/docker-compose.yml deleted file mode 100644 index 2991e0e883a1..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/docker-compose.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: '3.4' -services: - postgresql: - image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 - environment: - POSTGRES_DB: db - POSTGRES_USER: user - POSTGRES_PASSWORD: password - ports: - - 15432:5432 - clickhouse: - image: clickhouse/clickhouse-server:23-alpine@sha256:b078c1cd294632afa2aeba3530e7ba2e568513da23304354f455a25fab575c06 - environment: - CLICKHOUSE_DB: db - CLICKHOUSE_USER: user - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 - CLICKHOUSE_PASSWORD: password - ports: - - 19000:9000 - - 18123:8123 - fq-connector-go: - image: ghcr.io/ydb-platform/fq-connector-go:v0.0.6-rc.8@sha256:74ebae0530d916c1842a7fddfbddc6c018763a0401f2f627a44e8829692fe41f - ports: - - 50051:50051 - network_mode: host diff --git a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp b/ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp deleted file mode 100644 index e849c7251c04..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_plan_ut.cpp +++ /dev/null @@ -1,179 +0,0 @@ -#include "ch_recipe_ut_helpers.h" -#include "connector_recipe_ut_helpers.h" -#include "pg_recipe_ut_helpers.h" -#include -#include -#include - -#include - -#include -#include - -#include - -using namespace NYdb; -using namespace NYdb::NQuery; -using namespace NTestUtils; -using namespace fmt::literals; - -Y_UNIT_TEST_SUITE(KqpGenericPlanTest) { - Y_UNIT_TEST(PgSource) { - pqxx::connection pgConnection = CreatePostgresqlConnection(); - - { - pqxx::work work{pgConnection}; - const TString sql = R"sql( - CREATE TABLE pg_table_plan_test ( - key INT4 PRIMARY KEY, - name TEXT, - value INT4 - ) - )sql"; - work.exec(sql); - work.commit(); - } - - std::shared_ptr kikimr = MakeKikimrRunnerWithConnector(); - - auto tableCLient = kikimr->GetTableClient(); - auto session = tableCLient.CreateSession().GetValueSync().GetSession(); - - // external tables to pg/ch - { - const TString sql = fmt::format( - R"sql( - CREATE OBJECT pg_password_obj (TYPE SECRET) WITH (value="{pg_password}"); - CREATE EXTERNAL DATA SOURCE pg_data_source WITH ( - SOURCE_TYPE="PostgreSQL", - LOCATION="{pg_host}:{pg_port}", - DATABASE_NAME="{pg_database}", - USE_TLS="FALSE", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{pg_user}", - PASSWORD_SECRET_NAME="pg_password_obj" - ); - )sql", - "pg_host"_a = GetPgHost(), - "pg_port"_a = GetPgPort(), - "pg_user"_a = GetPgUser(), - "pg_password"_a = GetPgPassword(), - "pg_database"_a = GetPgDatabase()); - auto result = session.ExecuteSchemeQuery(sql).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } - - const TString sql = R"sql( - PRAGMA generic.UsePredicatePushdown="true"; - SELECT * FROM pg_data_source.pg_table_plan_test - WHERE key > 42 AND value <> 0 - )sql"; - - auto queryClient = kikimr->GetQueryClient(); - TExecuteQueryResult queryResult = queryClient.ExecuteQuery( - sql, - TTxControl::BeginTx().CommitTx(), - TExecuteQuerySettings().ExecMode(EExecMode::Explain)) - .GetValueSync(); - - UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); - UNIT_ASSERT(queryResult.GetStats()); - UNIT_ASSERT(queryResult.GetStats()->GetPlan()); - Cerr << "Plan: " << *queryResult.GetStats()->GetPlan() << Endl; - NJson::TJsonValue plan; - UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); - - const auto& stagePlan = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]; - UNIT_ASSERT_VALUES_EQUAL(stagePlan["Node Type"].GetStringSafe(), "Source"); - const auto& sourceOp = stagePlan["Operators"].GetArraySafe()[0]; - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ExternalDataSource"].GetStringSafe(), "pg_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Database"].GetStringSafe(), GetPgDatabase()); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Protocol"].GetStringSafe(), "Native"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Table"].GetStringSafe(), "pg_table_plan_test"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Name"].GetStringSafe(), "Read pg_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["SourceType"].GetStringSafe(), "PostgreSql"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[0].GetStringSafe(), "key"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[1].GetStringSafe(), "name"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[2].GetStringSafe(), "value"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Filter"].GetStringSafe(), "item.key > 42 And item.value != 0"); - } - - Y_UNIT_TEST(ChSource) { - NClickHouse::TClient chClient = CreateClickhouseClient(); - - // ch_table_plan_test - { - const TString sql = R"sql( - CREATE TABLE ch_table_plan_test ( - key INT PRIMARY KEY, - name TEXT NULL - ) - ENGINE = MergeTree - )sql"; - chClient.Execute(sql); - } - - std::shared_ptr kikimr = MakeKikimrRunnerWithConnector(); - - auto tableCLient = kikimr->GetTableClient(); - auto session = tableCLient.CreateSession().GetValueSync().GetSession(); - - // external tables to pg/ch - { - const TString sql = fmt::format( - R"sql( - CREATE OBJECT ch_password_obj (TYPE SECRET) WITH (value="{ch_password}"); - CREATE EXTERNAL DATA SOURCE ch_data_source WITH ( - SOURCE_TYPE="ClickHouse", - LOCATION="{ch_host}:{ch_port}", - DATABASE_NAME="{ch_database}", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{ch_user}", - PASSWORD_SECRET_NAME="ch_password_obj" - ); - )sql", - "ch_host"_a = GetChHost(), - "ch_port"_a = GetChPort(), - "ch_database"_a = GetChDatabase(), - "ch_user"_a = GetChUser(), - "ch_password"_a = GetChPassword()); - auto result = session.ExecuteSchemeQuery(sql).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } - - const TString sql = R"sql( - PRAGMA generic.UsePredicatePushdown="true"; - SELECT * FROM ch_data_source.ch_table_plan_test - WHERE name IS NOT NULL - )sql"; - - auto queryClient = kikimr->GetQueryClient(); - TExecuteQueryResult queryResult = queryClient.ExecuteQuery( - sql, - TTxControl::BeginTx().CommitTx(), - TExecuteQuerySettings().ExecMode(EExecMode::Explain)) - .GetValueSync(); - - UNIT_ASSERT_C(queryResult.IsSuccess(), queryResult.GetIssues().ToString()); - UNIT_ASSERT(queryResult.GetStats()); - UNIT_ASSERT(queryResult.GetStats()->GetPlan()); - Cerr << "Plan: " << *queryResult.GetStats()->GetPlan() << Endl; - NJson::TJsonValue plan; - UNIT_ASSERT(NJson::ReadJsonTree(*queryResult.GetStats()->GetPlan(), &plan)); - - const auto& stagePlan = plan["Plan"]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]["Plans"][0]; - UNIT_ASSERT_VALUES_EQUAL(stagePlan["Node Type"].GetStringSafe(), "Source"); - const auto& sourceOp = stagePlan["Operators"].GetArraySafe()[0]; - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ExternalDataSource"].GetStringSafe(), "ch_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Database"].GetStringSafe(), GetChDatabase()); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Protocol"].GetStringSafe(), "Native"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Table"].GetStringSafe(), "ch_table_plan_test"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Name"].GetStringSafe(), "Read ch_data_source"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["SourceType"].GetStringSafe(), "ClickHouse"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[0].GetStringSafe(), "key"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["ReadColumns"].GetArraySafe()[1].GetStringSafe(), "name"); - UNIT_ASSERT_VALUES_EQUAL(sourceOp["Filter"].GetStringSafe(), "Exist(item.name)"); - } -} diff --git a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp b/ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp deleted file mode 100644 index 4c88715f2af6..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/kqp_generic_provider_join_ut.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include "ch_recipe_ut_helpers.h" -#include "connector_recipe_ut_helpers.h" -#include "pg_recipe_ut_helpers.h" - -#include - -#include - -#include - -using namespace NTestUtils; -using namespace fmt::literals; - -Y_UNIT_TEST_SUITE(FederatedQueryJoin) { - Y_UNIT_TEST(InnerJoinChPg) { - pqxx::connection pgConnection = CreatePostgresqlConnection(); - NClickHouse::TClient chClient = CreateClickhouseClient(); - - // pg_table_inner_join_test - { - pqxx::work work{pgConnection}; - const TString sql = R"sql( - CREATE TABLE pg_table_inner_join_test ( - key INT PRIMARY KEY, - name TEXT - ) - )sql"; - work.exec(sql); - - const TString insertData = R"sql( - INSERT INTO pg_table_inner_join_test - (key, name) - VALUES - (1, 'A'), - (2, 'B'), - (1000, 'C'); - )sql"; - work.exec(insertData); - - work.commit(); - } - - // ch_table_inner_join_test - { - const TString sql = R"sql( - CREATE TABLE ch_table_inner_join_test ( - key INT PRIMARY KEY, - name TEXT - ) - ENGINE = MergeTree - )sql"; - chClient.Execute(sql); - - const TString insertData = R"sql( - INSERT INTO ch_table_inner_join_test - (key, name) - VALUES - (1, 'X'), - (3, 'Y'), - (1000, 'Z'); - )sql"; - chClient.Execute(insertData); - } - - std::shared_ptr kikimr = MakeKikimrRunnerWithConnector(); - auto queryClient = kikimr->GetQueryClient(); - - // external tables to pg/ch - { - const TString sql = fmt::format( - R"sql( - CREATE OBJECT pg_password_obj (TYPE SECRET) WITH (value="{pg_password}"); - CREATE EXTERNAL DATA SOURCE pg_data_source WITH ( - SOURCE_TYPE="PostgreSQL", - LOCATION="{pg_host}:{pg_port}", - DATABASE_NAME="{pg_database}", - USE_TLS="FALSE", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{pg_user}", - PASSWORD_SECRET_NAME="pg_password_obj" - ); - - CREATE OBJECT ch_password_obj (TYPE SECRET) WITH (value="{ch_password}"); - CREATE EXTERNAL DATA SOURCE ch_data_source WITH ( - SOURCE_TYPE="ClickHouse", - LOCATION="{ch_host}:{ch_port}", - DATABASE_NAME="{ch_database}", - AUTH_METHOD="BASIC", - PROTOCOL="NATIVE", - LOGIN="{ch_user}", - PASSWORD_SECRET_NAME="ch_password_obj" - ); - )sql", - "pg_host"_a = GetPgHost(), - "pg_port"_a = GetPgPort(), - "pg_user"_a = GetPgUser(), - "pg_password"_a = GetPgPassword(), - "pg_database"_a = GetPgDatabase(), - "ch_host"_a = GetChHost(), - "ch_port"_a = GetChPort(), - "ch_database"_a = GetChDatabase(), - "ch_user"_a = GetChUser(), - "ch_password"_a = GetChPassword()); - auto result = queryClient.ExecuteQuery(sql, NYdb::NQuery::TTxControl::NoTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } - - // join - const TString sql = R"sql( - SELECT pg.* FROM ch_data_source.ch_table_inner_join_test AS ch - INNER JOIN pg_data_source.pg_table_inner_join_test AS pg - ON ch.key = pg.key - WHERE ch.key > 998 - )sql"; - - auto result = queryClient.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - // results - auto resultSet = result.GetResultSetParser(0); - UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 1); - UNIT_ASSERT(resultSet.TryNextRow()); - - const TMaybe key = resultSet.ColumnParser("key").GetOptionalInt32(); - UNIT_ASSERT(key); - UNIT_ASSERT_VALUES_EQUAL(*key, 1000); - - const TMaybe name = resultSet.ColumnParser("name").GetOptionalUtf8(); - UNIT_ASSERT(name); - UNIT_ASSERT_VALUES_EQUAL(name, "C"); - } -} diff --git a/ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json b/ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json deleted file mode 100644 index bdae23d53676..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/mdb_mock_config.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "clickhouse_clusters": { - "ch-managed": { - "hosts": [ - { - "name": "ch-managed-1", - "cluster_id": "ch-managed", - "health": 1 - } - ] - } - } -} diff --git a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp b/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp deleted file mode 100644 index 12b7ee6103e3..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "pg_recipe_ut_helpers.h" - -#include -#include - -#include - -using namespace fmt::literals; - -namespace NTestUtils { - - TString GetPgHost() { - return "localhost"; - } - - ui32 GetPgPort() { - return 15432; - } - - TString GetPgUser() { - return "user"; - } - - TString GetPgDatabase() { - return "db"; - } - - TString GetPgPassword() { - return "password"; - } - - pqxx::connection CreatePostgresqlConnection() { - const TString connectionString = fmt::format( - "host={host} port={port} dbname={database} user={user} password={password}", - "host"_a = GetPgHost(), - "port"_a = GetPgPort(), - "database"_a = GetPgDatabase(), - "user"_a = GetPgUser(), - "password"_a = GetPgPassword()); - - TInstant start = TInstant::Now(); - ui32 attempt = 0; - while ((TInstant::Now() - start).Seconds() < 60) { - attempt += 1; - try { - return pqxx::connection{connectionString}; - } catch (const pqxx::broken_connection& e) { - Cerr << "Attempt " << attempt << ": " << e.what() << Endl; - Sleep(TDuration::MilliSeconds(100)); - } - } - - throw yexception() << "Failed to connect PostgreSQL in " << attempt << " attempt(s)"; - } - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h b/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h deleted file mode 100644 index 0e46e1ac3dc0..000000000000 --- a/ydb/core/kqp/ut/federated_query/generic/pg_recipe_ut_helpers.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include - -#include - -namespace NTestUtils { - - TString GetPgHost(); - ui32 GetPgPort(); - TString GetPgUser(); - TString GetPgDatabase(); - TString GetPgPassword(); - - pqxx::connection CreatePostgresqlConnection(); - -} // namespace NTestUtils diff --git a/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp b/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp index 8ec1995b5e53..a57c404fa5a8 100644 --- a/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/generic_ut/kqp_generic_provider_ut.cpp @@ -74,6 +74,22 @@ namespace NKikimr::NKqp { return settings; } + std::shared_ptr MakeDatabaseAsyncResolver(EProviderType providerType) { + std::shared_ptr databaseAsyncResolverMock; + + switch (providerType) { + case EProviderType::ClickHouse: + // We test access to managed databases only on the example of ClickHouse + databaseAsyncResolverMock = std::make_shared(); + databaseAsyncResolverMock->AddClickHouseCluster(); + break; + default: + break; + } + + return databaseAsyncResolverMock; + } + Y_UNIT_TEST_SUITE(GenericFederatedQuery) { void TestSelectAllFields(EProviderType providerType) { // prepare mock @@ -108,7 +124,6 @@ namespace NKikimr::NKqp { // step 3: ReadSplits std::vector colData = {10, 20, 30, 40, 50}; clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select() @@ -125,11 +140,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); @@ -162,15 +173,15 @@ namespace NKikimr::NKqp { MATCH_RESULT_WITH_INPUT(colData, resultSet, GetUint16); } - Y_UNIT_TEST(PostgreSQLLocal) { + Y_UNIT_TEST(PostgreSQLOnPremSelectAll) { TestSelectAllFields(EProviderType::PostgreSQL); } - Y_UNIT_TEST(ClickHouseManaged) { + Y_UNIT_TEST(ClickHouseManagedSelectAll) { TestSelectAllFields(EProviderType::ClickHouse); } - Y_UNIT_TEST(YdbManaged) { + Y_UNIT_TEST(YdbManagedSelectAll) { TestSelectAllFields(EProviderType::Ydb); } @@ -208,7 +219,6 @@ namespace NKikimr::NKqp { // step 3: ReadSplits clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select() @@ -222,11 +232,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); @@ -258,7 +264,7 @@ namespace NKikimr::NKqp { } } - Y_UNIT_TEST(PostgreSQLSelectConstant) { + Y_UNIT_TEST(PostgreSQLOnPremSelectConstant) { TestSelectConstant(EProviderType::PostgreSQL); } @@ -304,7 +310,6 @@ namespace NKikimr::NKqp { // step 3: ReadSplits clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select() @@ -318,11 +323,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); @@ -413,7 +414,6 @@ namespace NKikimr::NKqp { std::vector filterColumnData = {42, 24}; // clang-format off clientMock->ExpectReadSplits() - .DataSourceInstance(dataSourceInstance) .Split() .Description("some binary description") .Select(select) @@ -426,11 +426,7 @@ namespace NKikimr::NKqp { // clang-format on // prepare database resolver mock - std::shared_ptr databaseAsyncResolverMock; - if (providerType == EProviderType::ClickHouse) { - databaseAsyncResolverMock = std::make_shared(); - databaseAsyncResolverMock->AddClickHouseCluster(); - } + auto databaseAsyncResolverMock = MakeDatabaseAsyncResolver(providerType); // run test auto appConfig = CreateDefaultAppConfig(); diff --git a/ydb/core/kqp/ut/federated_query/ya.make b/ydb/core/kqp/ut/federated_query/ya.make index d09e0e44937f..a0defaea0fea 100644 --- a/ydb/core/kqp/ut/federated_query/ya.make +++ b/ydb/core/kqp/ut/federated_query/ya.make @@ -1,6 +1,5 @@ RECURSE_FOR_TESTS( common - generic generic_ut s3 style diff --git a/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp b/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp index 2000c73ad025..6aa539e02e5f 100644 --- a/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_agg_ut.cpp @@ -89,6 +89,107 @@ Y_UNIT_TEST_SUITE(KqpAgg) { [["Value3"];[1]] ])", FormatResultSetYson(result.GetResultSet(0))); } + + Y_UNIT_TEST(AggWithHop) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + SELECT + Text, + CAST(COUNT(*) as Int32) as Count, + SUM(Data) + FROM EightShard + GROUP BY HOP(CAST(Key AS Timestamp?), "PT1M", "PT1M", "PT1M"), Text + ORDER BY Text; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [["Value1"];[8];[15]]; + [["Value2"];[8];[16]]; + [["Value3"];[8];[17]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + + Y_UNIT_TEST(GroupByLimit) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + CREATE TABLE `TestTable` ( + a Uint64, + b Uint64, + c Uint64, + d Uint64, + e Uint64, + PRIMARY KEY (a, b, c) + ); + )").GetValueSync()); + + AssertSuccessResult(session.ExecuteDataQuery(R"( + REPLACE INTO `TestTable` (a, b, c, d, e) VALUES + (1, 11, 21, 31, 41), + (2, 12, 22, 32, 42), + (3, 13, 23, 33, 43); + )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync()); + + + { // query with 36 groups and limit 32 + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + PRAGMA GroupByLimit = '32'; + + SELECT a, b, c, d, SUM(e) Data FROM TestTable + GROUP BY ROLLUP(a, b, c, d, a * b AS ab, b * c AS bc, c * d AS cd, a + b AS sum) + ORDER BY a, b, c, d; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + } + + { // query with 36 groups (without explicit limit) + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + + SELECT a, b, c, d, SUM(e) Data FROM TestTable + GROUP BY ROLLUP(a, b, c, d, a * b AS ab, b * c AS bc, c * d AS cd, a + b AS sum) + ORDER BY a, b, c, d; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [#;#;#;#;[126u]]; + [[1u];#;#;#;[41u]]; + [[1u];[11u];#;#;[41u]]; + [[1u];[11u];[21u];#;[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[1u];[11u];[21u];[31u];[41u]]; + [[2u];#;#;#;[42u]]; + [[2u];[12u];#;#;[42u]]; + [[2u];[12u];[22u];#;[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[2u];[12u];[22u];[32u];[42u]]; + [[3u];#;#;#;[43u]]; + [[3u];[13u];#;#;[43u]]; + [[3u];[13u];[23u];#;[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]]; + [[3u];[13u];[23u];[33u];[43u]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 3c13d8c27b4d..2963a5acbf03 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -5025,6 +5025,54 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/*"); } + Y_UNIT_TEST(CreateExternalTableWithUpperCaseSettings) { + TKikimrRunner kikimr; + kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableExternalDataSources(true); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + TString externalDataSourceName = "/Root/ExternalDataSource"; + TString externalTableName = "/Root/ExternalTable"; + auto query = TStringBuilder() << R"( + CREATE EXTERNAL DATA SOURCE `)" << externalDataSourceName << R"(` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + CREATE EXTERNAL TABLE `)" << externalTableName << R"(` ( + Key Uint64, + Value String, + Year Int64 NOT NULL, + Month Int64 NOT NULL + ) WITH ( + DATA_SOURCE=")" << externalDataSourceName << R"(", + LOCATION="/folder1/*", + FORMAT="json_as_string", + `projection.enabled`="true", + `projection.Year.type`="integer", + `projection.Year.min`="2010", + `projection.Year.max`="2022", + `projection.Year.interval`="1", + `projection.Month.type`="integer", + `projection.Month.min`="1", + `projection.Month.max`="12", + `projection.Month.interval`="1", + `projection.Month.digits`="2", + `storage.location.template`="${Year}/${Month}", + PARTITIONED_BY = "[Year, Month]" + );)"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + auto externalTableDesc = Navigate(runtime, runtime.AllocateEdgeActor(), externalTableName, NKikimr::NSchemeCache::TSchemeCacheNavigate::EOp::OpUnknown); + const auto& externalTable = externalTableDesc->ResultSet.at(0); + UNIT_ASSERT_EQUAL(externalTable.Kind, NKikimr::NSchemeCache::TSchemeCacheNavigate::EKind::KindExternalTable); + UNIT_ASSERT(externalTable.ExternalTableInfo); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.ColumnsSize(), 4); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetDataSourcePath(), externalDataSourceName); + UNIT_ASSERT_VALUES_EQUAL(externalTable.ExternalTableInfo->Description.GetLocation(), "/folder1/*"); + } + Y_UNIT_TEST(DoubleCreateExternalTable) { TKikimrRunner kikimr; kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableExternalDataSources(true); diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index aa8009bcacab..84afe3c52cb7 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -628,6 +628,29 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL(totalTasks, 2); } + Y_UNIT_TEST(ExecStatsAst) { + auto kikimr = DefaultKikimrRunner(); + auto db = kikimr.GetQueryClient(); + + auto settings = TExecuteQuerySettings() + .StatsMode(EStatsMode::Full); + + std::vector> cases = { + { "SELECT 42 AS test_ast_column", EStatus::SUCCESS }, + { "SELECT test_ast_column FROM TwoShard", EStatus::GENERIC_ERROR }, + { "SELECT UNWRAP(42 / 0) AS test_ast_column", EStatus::PRECONDITION_FAILED }, + }; + + for (const auto& [sql, status] : cases) { + auto result = db.ExecuteQuery(sql, TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), status, result.GetIssues().ToString()); + + UNIT_ASSERT(result.GetStats().Defined()); + UNIT_ASSERT(result.GetStats()->GetAst().Defined()); + UNIT_ASSERT_STRING_CONTAINS(*result.GetStats()->GetAst(), "test_ast_column"); + } + } + Y_UNIT_TEST(Ddl) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); diff --git a/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp index 5fdb3ed11809..9b17d3cfede0 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_scripts_ut.cpp @@ -202,6 +202,32 @@ Y_UNIT_TEST_SUITE(KqpQueryServiceScripts) { ])", FormatResultSetYson(results.GetResultSet())); } + Y_UNIT_TEST(ExecuteScriptWithParameters) { + auto kikimr = DefaultKikimrRunner(); + auto db = kikimr.GetQueryClient(); + + auto params = TParamsBuilder() + .AddParam("$value").Int64(17).Build() + .Build(); + + auto scriptExecutionOperation = db.ExecuteScript(R"( + DECLARE $value As Int64; + SELECT $value; + )", params).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr.GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + + TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync(); + UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString()); + + CompareYson(R"([ + [17] + ])", FormatResultSetYson(results.GetResultSet())); + } void ExecuteScriptWithStatsMode(Ydb::Query::StatsMode statsMode) { auto kikimr = DefaultKikimrRunner(); @@ -394,14 +420,13 @@ Y_UNIT_TEST_SUITE(KqpQueryServiceScripts) { i32 successCount = 0; for (auto& f : forgetFutures) { auto forgetStatus = f.ExtractValueSync(); - UNIT_ASSERT_C(forgetStatus.GetStatus() == NYdb::EStatus::SUCCESS || forgetStatus.GetStatus() == NYdb::EStatus::NOT_FOUND || - forgetStatus.GetStatus() == NYdb::EStatus::ABORTED, forgetStatus.GetIssues().ToString()); + UNIT_ASSERT_C(forgetStatus.GetStatus() == NYdb::EStatus::SUCCESS || forgetStatus.GetStatus() == NYdb::EStatus::NOT_FOUND, forgetStatus.GetIssues().ToString()); if (forgetStatus.GetStatus() == NYdb::EStatus::SUCCESS) { ++successCount; } } - UNIT_ASSERT(successCount == 1); + UNIT_ASSERT(successCount >= 1); auto op = opClient.Get(scriptExecutionOperation.Id()).ExtractValueSync(); auto forgetStatus = opClient.Forget(scriptExecutionOperation.Id()).ExtractValueSync(); diff --git a/ydb/core/protos/node_limits.proto b/ydb/core/protos/node_limits.proto index 5aaf8c6fdaf8..ef067d578c48 100644 --- a/ydb/core/protos/node_limits.proto +++ b/ydb/core/protos/node_limits.proto @@ -3,10 +3,9 @@ option java_package = "ru.yandex.kikimr.proto"; message TNodeLimitsConfig { message TPersQueueNodeConfig { - optional uint64 SharedCacheSizeMb = 1 [default = 8192]; + optional uint64 SharedCacheSizeMb = 1 [default = 1024]; optional uint32 CacheKeepTimeSec = 2 [default = 10]; } optional TPersQueueNodeConfig PersQueueNodeConfig = 1; } - diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 978d3e3b1f97..16624c6101c0 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -860,11 +860,11 @@ namespace Tests { ); std::shared_ptr databaseAsyncResolver; - if (queryServiceConfig.GetGeneric().HasMdbGateway() && queryServiceConfig.HasMdbTransformHost()) { + if (queryServiceConfig.GetGeneric().HasMdbGateway() || queryServiceConfig.GetGeneric().HasYdbMvpEndpoint()) { databaseAsyncResolver = std::make_shared( Runtime->GetActorSystem(nodeIdx), databaseResolverActorId, - "", + queryServiceConfig.GetGeneric().GetYdbMvpEndpoint(), queryServiceConfig.GetGeneric().GetMdbGateway(), NFq::MakeMdbEndpointGeneratorGeneric(queryServiceConfig.GetMdbTransformHost()) ); diff --git a/ydb/core/tx/datashard/execute_distributed_erase_tx_unit.cpp b/ydb/core/tx/datashard/execute_distributed_erase_tx_unit.cpp index 1c1a8be5e81b..ab6f553e5b20 100644 --- a/ydb/core/tx/datashard/execute_distributed_erase_tx_unit.cpp +++ b/ydb/core/tx/datashard/execute_distributed_erase_tx_unit.cpp @@ -91,11 +91,12 @@ class TExecuteDistributedEraseTxUnit : public TExecutionUnit { for (const auto& rs : readSets) { NKikimrTxDataShard::TDistributedEraseRS body; Y_ABORT_UNLESS(body.ParseFromArray(rs.Body.data(), rs.Body.size())); - Y_ABORT_UNLESS(presentRows.contains(rs.Origin)); - const bool ok = Execute(txc, request, presentRows.at(rs.Origin), - DeserializeBitMap(body.GetConfirmedRows()), writeVersion, op->GetGlobalTxId()); - Y_ABORT_UNLESS(ok); + + auto confirmedRows = DeserializeBitMap(body.GetConfirmedRows()); + if (!Execute(txc, request, presentRows.at(rs.Origin), confirmedRows, writeVersion, op->GetGlobalTxId())) { + return EExecutionStatus::Restart; + } } } diff --git a/ydb/core/viewer/counters_hosts.h b/ydb/core/viewer/counters_hosts.h index 9a65349aae2d..2232f16b00ba 100644 --- a/ydb/core/viewer/counters_hosts.h +++ b/ydb/core/viewer/counters_hosts.h @@ -16,10 +16,13 @@ using namespace NActors; using namespace NNodeWhiteboard; class TCountersHostsList : public TActorBootstrapped { + using TBase = TActorBootstrapped; + IViewer* Viewer; NMon::TEvHttpInfo::TPtr Event; THolder NodesInfo; TMap> NodesResponses; + THashSet TcpProxies; ui32 NodesRequested = 0; ui32 NodesReceived = 0; bool StaticNodesOnly = false; @@ -35,47 +38,48 @@ class TCountersHostsList : public TActorBootstrapped { , Event(ev) {} - void Bootstrap(const TActorContext& ctx) { + void Bootstrap() { const auto& params(Event->Get()->Request.GetParams()); StaticNodesOnly = FromStringWithDefault(params.Get("static_only"), StaticNodesOnly); DynamicNodesOnly = FromStringWithDefault(params.Get("dynamic_only"), DynamicNodesOnly); const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); + Send(nameserviceId, new TEvInterconnect::TEvListNodes()); + Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); Become(&TThis::StateRequestedList); } STFUNC(StateRequestedList) { switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, Handle); - CFunc(TEvents::TSystem::Wakeup, Timeout); + hFunc(TEvInterconnect::TEvNodesInfo, Handle); + cFunc(TEvents::TSystem::Wakeup, Timeout); } } STFUNC(StateRequestedSysInfo) { switch (ev->GetTypeRewrite()) { - HFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - HFunc(TEvents::TEvUndelivered, Undelivered); - HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - CFunc(TEvents::TSystem::Wakeup, Timeout); + hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); + hFunc(TEvents::TEvUndelivered, Undelivered); + hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); + hFunc(TEvInterconnect::TEvNodeConnected, Connected); + cFunc(TEvents::TSystem::Wakeup, Timeout); } } - void SendRequest(ui32 nodeId, const TActorContext& ctx) { + void SendRequest(ui32 nodeId) { TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); THolder request = MakeHolder(); - ctx.Send(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++NodesRequested; + Send(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); + NodesRequested++; } - void NodeStateInfoReceived(const TActorContext& ctx) { + void NodeStateInfoReceived() { ++NodesReceived; if (NodesRequested == NodesReceived) { - ReplyAndDie(ctx); + ReplyAndDie(); } } - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { NodesInfo = ev->Release(); ui32 minAllowedNodeId = std::numeric_limits::min(); ui32 maxAllowedNodeId = std::numeric_limits::max(); @@ -90,33 +94,38 @@ class TCountersHostsList : public TActorBootstrapped { } for (const auto& nodeInfo : NodesInfo->Nodes) { if (nodeInfo.NodeId >= minAllowedNodeId && nodeInfo.NodeId <= maxAllowedNodeId) { - SendRequest(nodeInfo.NodeId, ctx); + SendRequest(nodeInfo.NodeId); } } Become(&TThis::StateRequestedSysInfo); } - void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; NodesResponses[nodeId] = ev->Release(); - NodeStateInfoReceived(ctx); + NodeStateInfoReceived(); } - void Undelivered(TEvents::TEvUndelivered::TPtr& ev, const TActorContext& ctx) { + void Undelivered(TEvents::TEvUndelivered::TPtr& ev) { ui32 nodeId = ev.Get()->Cookie; if (NodesResponses.emplace(nodeId, nullptr).second) { - NodeStateInfoReceived(ctx); + NodeStateInfoReceived(); } } - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev, const TActorContext& ctx) { + void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { ui32 nodeId = ev->Get()->NodeId; + TcpProxies.erase(ev->Sender); if (NodesResponses.emplace(nodeId, nullptr).second) { - NodeStateInfoReceived(ctx); + NodeStateInfoReceived(); } } - void ReplyAndDie(const TActorContext& ctx) { + void Connected(TEvInterconnect::TEvNodeConnected::TPtr& ev) { + TcpProxies.insert(ev->Sender); + } + + void ReplyAndDie() { TStringStream text; for (const auto& [nodeId, sysInfo] : NodesResponses) { if (sysInfo) { @@ -147,12 +156,19 @@ class TCountersHostsList : public TActorBootstrapped { } } } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKTEXT(Event->Get()) + text.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); + Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKTEXT(Event->Get()) + text.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + PassAway(); + } + + void PassAway() { + for (auto &tcpPorxy: TcpProxies) { + Send(tcpPorxy, new TEvents::TEvUnsubscribe); + } + TBase::PassAway(); } - void Timeout(const TActorContext &ctx) { - ReplyAndDie(ctx); + void Timeout() { + ReplyAndDie(); } }; diff --git a/ydb/core/viewer/json_vdisk_req.h b/ydb/core/viewer/json_vdisk_req.h index 61b805d9a498..28459d510b56 100644 --- a/ydb/core/viewer/json_vdisk_req.h +++ b/ydb/core/viewer/json_vdisk_req.h @@ -60,6 +60,8 @@ class TJsonVDiskRequest : public TViewerPipeClient TcpProxyId; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; @@ -111,6 +113,7 @@ class TJsonVDiskRequest : public TViewerPipeClientSender; + } + void Disconnected() { + TcpProxyId = {}; if (!RetryRequest()) { TBase::RequestDone(); } @@ -170,6 +178,13 @@ class TJsonVDiskRequest : public TViewerPipeClientSend(*TcpProxyId, new TEvents::TEvUnsubscribe); + } + TBase::PassAway(); + } + void ReplyAndPassAway(const TString &error = "") { try { TStringStream json; @@ -182,10 +197,8 @@ class TJsonVDiskRequest : public TViewerPipeClient diff --git a/ydb/docs/ru/core/concepts/datamodel/external_data_source.md b/ydb/docs/ru/core/concepts/datamodel/external_data_source.md index 69c036ac4e06..62d8fd1526a5 100644 --- a/ydb/docs/ru/core/concepts/datamodel/external_data_source.md +++ b/ydb/docs/ru/core/concepts/datamodel/external_data_source.md @@ -2,7 +2,7 @@ {% note warning %} -Данная функциональность находится в режиме "Preview". +Данная функциональность находится в режиме "Experimental". {% endnote %} diff --git a/ydb/docs/ru/core/concepts/federated_query/_assets/architecture.png b/ydb/docs/ru/core/concepts/federated_query/_assets/architecture.png new file mode 100644 index 000000000000..6a835c268033 Binary files /dev/null and b/ydb/docs/ru/core/concepts/federated_query/_assets/architecture.png differ diff --git a/ydb/docs/ru/core/concepts/federated_query/architecture.md b/ydb/docs/ru/core/concepts/federated_query/architecture.md new file mode 100644 index 000000000000..3df1f6ea5994 --- /dev/null +++ b/ydb/docs/ru/core/concepts/federated_query/architecture.md @@ -0,0 +1,36 @@ +# Aрхитектура системы обработки федеративных запросов + +## Внешние источники данных и внешние таблицы + +Ключевым элементом системы обработки федеративных запросов {{ ydb-full-name }} является понятие [внешнего источника данных](../datamodel/external_data_source.md) (external data source). В качестве таких источников могут выступать реляционные СУБД, объектные хранилища и другие системы хранения данных. При обработке федеративного запроса {{ ydb-short-name }} потоково вычитывает данные из внешних систем и позволяет выполнять над ними точно такой же спектр операций, что и для локальных данных. + +Для того, чтобы работать с данными, размещёнными во внешних системах, {{ ydb-short-name }} должна располагать информацией о внутренней структуре этих данных (например, о количестве, названиях и типах столбцов в таблицах). Некоторые источники предоставляют подобную метаинформацию о данных вместе с самими данными, тогда как для работы с другими, несхематизированными источниками требуется задание этой метаинформации извне. Последней цели служат [внешние таблицы](../datamodel/external_table.md) (external tables). + +Зарегистрировав в {{ ydb-short-name }} внешние источники данных и (в случае необходимости) внешние таблицы, клиент может приступать к описанию федеративных запросов. + +## Коннекторы {#connectors} + +В ходе выполнения федеративных запросов {{ ydb-short-name }} необходимо обращаться по сети к сторонним системам хранения данных, для чего приходится использовать их клиентские библиотеки. Появление таких зависимостей негативно сказывается на объёме кодовой базы, времени компиляции и размере бинарных файлов {{ ydb-short-name }}, а также на стабильности всего продукта в целом. + +Перечень поддерживаемых источников данных для федеративных запросов постоянно расширяется. +Наиболее популярные источники, такие как [S3](s3), поддерживаются {{ ydb-short-name }} нативно. Однако не всем пользователям требуется поддержка одновременно всех источников. Её можно включить опционально с помощью _коннекторов_ - специальных микросервисов, реализующих унифицированный интерфейс доступа к внешним источникам данных. + +В функции коннекторов входят: + +* Трансляция YQL-запросов в запросы на языке, специфичном для внешнего источника (например, в запросы на другом диалекте SQL или в обращения к HTTP API). +* Организация сетевых соединений с источниками данных. +* Конвертация данных, извлечённых из внешних источников, в колоночное представление в формате [Arrow IPC Stream](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc), поддерживаемом {{ ydb-short-name }}. + +![Архитектура YDB Federated Query](_assets/architecture.png "Архитектура YDB Federated Query" =640x) + +Таким образом, благодаря коннекторам формируется слой абстракции, скрывающий от {{ ydb-short-name }} специфику внешних источников данных. Лаконичность интерфейса коннектора позволяет легко расширять перечень поддерживаемых источников, внося минимальные изменения в код {{ ydb-short-name }}. + +Пользователи могут развернуть [один из готовых коннекторов](../../deploy/manual/connector.md) или написать свою реализацию на любом языке программирования по [gRPC спецификации](https://github.com/ydb-platform/ydb/tree/main/ydb/library/yql/providers/generic/connector/api). + +## Перечень поддерживаемых внешних источников данных {#supported-datasources} + +| Источник | Поддержка | +| -------- | --------- | +| [S3](https://aws.amazon.com/ru/s3/) | Встроенная в `ydbd` | +| [ClickHouse](https://clickhouse.com/) | Через коннектор [fq-connector-go](../../deploy/manual/connector.md#fq-connector-go) | +| [PostgreSQL](https://www.postgresql.org/) | Через коннектор [fq-connector-go](../../deploy/manual/connector.md#fq-connector-go) | diff --git a/ydb/docs/ru/core/concepts/federated_query/index.md b/ydb/docs/ru/core/concepts/federated_query/index.md index ab85cceb7ddf..f240d77cc74a 100644 --- a/ydb/docs/ru/core/concepts/federated_query/index.md +++ b/ydb/docs/ru/core/concepts/federated_query/index.md @@ -6,14 +6,11 @@ {% endnote %} +Федеративные запросы - это способ получать информацию из различных источников данных без необходимости переноса данных этих источников внутрь {{ ydb-full-name }}. В настоящее время федеративные запросы поддерживают взаимодействие с базами данных ClickHouse, PostgreSQL и с хранилищами данных класса S3. При помощи YQL запросов вы сможете обращаться к этим базам данных без необходимости дублирования данных между системами. -Федеративные запросы - это способ получать информацию из различных источников данных без необходимости переноса данных этих источников внутрь {{ ydb-full-name }}. Федеративные запросы поддерживают взаимодействие с базами данных ClickHouse, PostgreSQL и с хранилищами данных класса S3 ({{ objstorage-name }}). При помощи YQL запросов вы сможете обращаться к этим базам данных без необходимости дублирования данных между системами. +Для работы с данными, хранящимися во внешних СУБД, достаточно создать [внешний источник данных](../datamodel/external_data_source.md). Для работы с несхематизированными данными, хранящимися в бакетах S3 нужно дополнительно создать [внешнюю таблицу](../datamodel/external_table.md). В обоих случаях необходимо предварительно создать объекты-[секреты](../datamodel/secrets.md), хранящие конфиденциальные данные, необходимые для аутентификации во внешних системах. -Для работы с данными, хранящимися во внешних СУБД, достаточно создать [внешний источник данных](../datamodel/external_data_source.md). Для работы с несхематизированными данными, хранящимися в бакетах S3 ({{objstorage-full-name}}) нужно дополнительно создать объект [внешнюю таблицу](../datamodel/external_table.md). В обоих случаях необходимо предварительно создать объекты-[секреты](../datamodel/secrets.md), хранящие конфиденциальные данные, необходимые для аутентификации во внешних системах. - -Подробная информация про работу с различными источниками данных приведена в соответствующих разделах: +Вы сможете узнать о внутреннем устройстве системы обработки федеративных запросов в разделе об [архитектуре](./architecture.md). Подробная информация про работу с различными источниками данных приведена в соответствующих разделах: - [ClickHouse](clickhouse.md). - [PostgreSQL](postgresql.md). -- [S3 ({{objstorage-full-name}})](s3/external_table.md). - - +- [S3](s3/external_table.md). diff --git a/ydb/docs/ru/core/concepts/federated_query/toc_i.yaml b/ydb/docs/ru/core/concepts/federated_query/toc_i.yaml index d9b8fafc2128..0a8ada4f7bdc 100644 --- a/ydb/docs/ru/core/concepts/federated_query/toc_i.yaml +++ b/ydb/docs/ru/core/concepts/federated_query/toc_i.yaml @@ -1,5 +1,6 @@ items: - { name: Обзор, href: index.md } +- { name: Архитектура, href: architecture.md } - { name: Работа с базами данных PostgreSQL, href: postgresql.md } - { name: Работа с базами данных ClickHouse, href: clickhouse.md } - name: Работа с бакетами S3 diff --git a/ydb/docs/ru/core/deploy/manual/_images/ydb_fq_onprem.png b/ydb/docs/ru/core/deploy/manual/_images/ydb_fq_onprem.png new file mode 100644 index 000000000000..95e896e3b9cf Binary files /dev/null and b/ydb/docs/ru/core/deploy/manual/_images/ydb_fq_onprem.png differ diff --git a/ydb/docs/ru/core/deploy/manual/connector.md b/ydb/docs/ru/core/deploy/manual/connector.md new file mode 100644 index 000000000000..21df10315284 --- /dev/null +++ b/ydb/docs/ru/core/deploy/manual/connector.md @@ -0,0 +1,185 @@ +# Развёртывание коннекторов ко внешним источникам данных + +{% note warning %} + +Данная функциональность находится в режиме "Experimental". + +{% endnote %} + +[Коннекторы](../../concepts/federated_query/architecture.md#connectors) - специальные микросервисы, предоставляющие {{ ydb-full-name }} универсальную абстракцию доступа ко внешним источникам данных. Коннекторы выступают в качестве точек расширения системы обработки [федеративных запросов](../../concepts/federated_query/index.md) {{ ydb-full-name }}. В данном руководстве мы рассмотрим особенности развёртывания коннекторов в режиме on-premise. + +## fq-connector-go {#fq-connector-go} + +Коннектор `fq-connector-go` реализован на языке Go; его исходный код размещён на [GitHub](https://github.com/ydb-platform/fq-connector-go). Он обеспечивает доступ к следующим источникам данных: + +* [ClickHouse](https://clickhouse.com/) +* [PostgreSQL](https://www.postgresql.org/) + +Коннектор может быть установлен с помощью бинарного дистрибутива или с помощью Docker-образа. + +### Запуск из бинарного дистрибутива + +Для установки коннектора на физический или виртуальный Linux-сервер без средств контейнерной виртуализации используйте бинарные дистрибутивы. + +1. На [странице с релизами](https://github.com/ydb-platform/fq-connector-go/releases) коннектора выберите последний релиз, скачайте архив для подходящей вам платформы и архитектуры. Так выглядит команда для скачивания коннектора версии `v0.2.4` под платформу Linux и архитектуру процессора `amd64`: + ```bash + mkdir /tmp/connector && cd /tmp/connector + wget https://github.com/ydb-platform/fq-connector-go/releases/download/v0.2.4/fq-connector-go-v0.2.4-linux-amd64.tar.gz + tar -xzf fq-connector-go-v0.2.4-linux-amd64.tar.gz + ``` + +1. Если на сервере ещё не были развёрнуты узлы {{ ydb-short-name }}, создайте директории для хранения исполняемых и конфигурационных файлов: + + ```bash + sudo mkdir -p /opt/ydb/bin /opt/ydb/cfg + ``` + +1. Разместите разархивированные исполняемый и конфигурационный файлы коннектора в только что созданные директории: + ```bash + sudo cp fq-connector-go /opt/ydb/bin + sudo cp fq-connector-go.yaml /opt/ydb/cfg + ``` + +1. В [рекомендуемом режиме использования](../../deploy/manual/deploy-ydb-federated-query.md#general-scheme) коннектор развёртывается на тех же серверах, что и динамические узлы {{ ydb-short-name }}, следовательно, шифрование сетевых соединений между ними *не требуется*. Однако если вам всё же необходимо включить шифрование, [подготовьте пару TLS-ключей](../manual/deploy-ydb-on-premises.md#tls-certificates) и пропишите пути до публичного и приватного ключа в поля `connector_server.tls.cert` и `connector_server.tls.key` конфигурационного файла `fq-connector-go.yaml`: + ```yaml + connector_server: + # ... + tls: + cert: "/opt/ydb/certs/fq-connector-go.crt" + key: "/opt/ydb/certs/fq-connector-go.key" + ``` +1. В случае, если внешние источники данных используют TLS, для организации шифрованных соединений с ними коннектору потребуется корневой или промежуточный сертификат удостоверяющего центра (Certificate Authority, CA), которым были подписаны сертификаты источников. На Linux-серверах обычно предустанавливается некоторое количество корневых сертификатов CA. Для ОС Ubuntu список поддерживаемых CA можно вывести следующей командой: + ```bash + awk -v cmd='openssl x509 -noout -subject' '/BEGIN/{close(cmd)};{print | cmd}' < /etc/ssl/certs/ca-certificates.crt + ``` + Если на сервере отсутствует сертификат нужного CA, скопируйте его в специальную системную директорию и обновите список сертификатов: + ```bash + sudo cp root_ca.crt /usr/local/share/ca-certificates/ + sudo update-ca-certificates + ``` + +1. Вы можете запустить сервис вручную или с помощью systemd. + + {% list tabs %} + + - Вручную + + Запустите сервис из консоли следующей командой: + ```bash + /opt/ydb/bin/fq-connector-go server -c /opt/ydb/cfg/fq-connector-go.yaml + ``` + + - С использованием systemd + + Вместе с бинарным дистрибутивом fq-connector-go распространяется [пример](https://github.com/ydb-platform/fq-connector-go/blob/main/examples/systemd/fq-connector-go.service) конфигурационного файла (юнита) для системы инициализации `systemd`. Скопируйте юнит в директорию `/etc/systemd/system`, активизируйте и запустите сервис: + + ```bash + cd /tmp/connector + sudo cp fq-connector-go.service /etc/systemd/system/ + sudo systemctl enable fq-connector-go.service + sudo systemctl start fq-connector-go.service + ``` + + В случае успеха сервис должен перейти в состояние `active (running)`. Проверьте его следующей командой: + ```bash + sudo systemctl status fq-connector-go + ● fq-connector-go.service - YDB FQ Connector Go + Loaded: loaded (/etc/systemd/system/fq-connector-go.service; enabled; vendor preset: enabled) + Active: active (running) since Thu 2024-02-29 17:51:42 MSK; 2s ago + ``` + + Логи сервиса можно прочитать с помощью команды: + ```bash + sudo journalctl -u fq-connector-go.service + ``` + {% endlist %} + +### Запуск в Docker {#fq-connector-go-docker} + +1. Для запуска коннектора используйте официальный [Docker-образ](https://github.com/ydb-platform/fq-connector-go/pkgs/container/fq-connector-go). Он уже содержит [конфигурационный файл](https://github.com/ydb-platform/fq-connector-go/blob/main/app/server/config/config.prod.yaml) сервиса. Запустить сервис с настройками по умолчанию можно следующей командой: + + ```bash + docker run -d \ + --name=fq-connector-go \ + -p 2130:2130 \ + ghcr.io/ydb-platform/fq-connector-go:latest + ``` + + На порту 2130 публичного сетевого интерфейса вашего хоста запустится слушающий сокет GRPC-сервиса коннектора. В дальнейшем сервер {{ ydb-short-name }} должен будет установить соединение именно с этим сетевым адресом. + +1. При необходимости изменения конфигурации подготовьте конфигурационный файл [по образцу](#fq-connector-go-config) и примонтируйте его к контейнеру: + + ```bash + docker run -d \ + --name=fq-connector-go \ + -p 2130:2130 \ + -v /path/to/config.yaml:/opt/ydb/cfg/fq-connector-go.yaml + ghcr.io/ydb-platform/fq-connector-go:latest + ``` + +1. В [рекомендуемом режиме использования](../../deploy/manual/deploy-ydb-federated-query.md#general-scheme) коннектор развёртывается на тех же серверах, что и динамические узлы {{ ydb-short-name }}, следовательно, шифрование сетевых соединений между ними *не требуется*. Но если вам всё же необходимо включить шифрование между {{ ydb-short-name }} и коннектором, [подготовьте пару TLS-ключей](../manual/deploy-ydb-on-premises.md#tls-certificates) и пропишите пути до публичного и приватного ключа в секции конфигурационного файла `connector_server.tls.cert` и `connector_server.tls.key` соответственно: + + ```yaml + connector_server: + # ... + tls: + cert: "/opt/ydb/certs/fq-connector-go.crt" + key: "/opt/ydb/certs/fq-connector-go.key" + ``` + При запуске контейнера примонтируйте внутрь него директорию с парой TLS-ключей так, чтобы они оказались доступны для процесса `fq-connector-go` по путям, указанным в конфигурационном файле: + + ```bash + docker run -d \ + --name=fq-connector-go \ + -p 2130:2130 \ + -v /path/to/config.yaml:/opt/ydb/cfg/fq-connector-go.yaml + -v /path/to/keys/:/opt/ydb/certs/ + ghcr.io/ydb-platform/fq-connector-go:latest + ``` + +1. В случае, если внешние источники данных используют TLS, для организации шифрованных соединений с ними коннектору потребуется корневой или промежуточный сертификат удостоверяющего центра (Certificate Authority, CA), которым были подписаны сертификаты источников. Docker-образ для коннектора базируется на образе дистрибутива Alpine Linux, который уже содержит некоторое количество сертификатов от доверенных CA. Проверить наличие нужного CA в списке предустановленных можно следующей командой: + + ```bash + docker run -it --rm ghcr.io/ydb-platform/fq-connector-go sh + # далее в консоли внутри контейнера: + apk add openssl + awk -v cmd='openssl x509 -noout -subject' ' /BEGIN/{close(cmd)};{print | cmd}' < /etc/ssl/certs/ca-certificates.crt + ``` + + Если TLS-ключи для источников выпущены CA, не входящим в перечень доверенных, необходимо добавить сертификат этого CA в системные пути контейнера с коннектором. Сделать это можно, например, собрав собственный Docker-образ на основе имеющегося. Для этого подготовьте следующий `Dockerfile`: + + ```Dockerfile + FROM ghcr.io/ydb-platform/fq-connector-go:latest + + USER root + + RUN apk --no-cache add ca-certificates openssl + COPY root_ca.crt /usr/local/share/ca-certificates + RUN update-ca-certificates + ``` + + Поместите `Dockerfile` и корневой сертификат CA в одной папке, зайдите в неё и соберите образ следующей командой: + ```bash + docker build -t fq-connector-go_custom_ca . + ``` + + Новый образ `fq-connector-go_custom_ca` можно использовать для развёртывания сервиса с помощью команд, приведённых выше. + +### Конфигурация {#fq-connector-go-config} + +Актуальный пример конфигурационного файла сервиса `fq-connector-go` можно найти в [репозитории](https://github.com/ydb-platform/fq-connector-go/blob/main/app/server/config/config.prod.yaml). + +| Параметр | Назначение | +|----------|------------| +| `connector_server` | Обязательная секция. Содержит настройки основного GPRC-сервера, выполняющего доступ к данным. | +| `connector_server.endpoint.host` | Хостнейм или IP-адрес, на котором запускается слушающий сокет сервиса. | +| `connector_server.endpoint.port` | Номер порта, на котором запускается слушающий сокет сервиса. | +| `connector_server.tls` | Опциональная секция. Заполняется, если требуется включение TLS-соединений для основного GRPC-сервиса `fq-connector-go`. По умолчанию сервис запускается без TLS. | +| `connector_server.tls.key` | Полный путь до закрытого ключа шифрования. | +| `connector_server.tls.cert` | Полный путь до открытого ключа шифрования. | +| `logger` | Опциональная секция. Содержит настройки логирования. | +| `logger.log_level` | Уровень логгирования. Допустимые значения: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, `FATAL`. Значение по умолчанию: `INFO`. | +| `logger.enable_sql_query_logging` | Для источников данных, поддерживающих SQL, включает логирование транслированных запросов. Допустимые значения: `true`, `false`. **ВАЖНО**: включение этой опции может привести к печати конфиденциальных пользовательских данных в логи. Значение по умолчанию: `false`. | +| `paging` | Опциональная секция. Содержит настройки алгоритма разбиения извлекаемого из источника потока данных на Arrow-блоки. На каждый запрос в коннекторе создаётся очередь из заранее подготовленных к отправке на сторону {{ ydb-short-name }} блоков данных. Аллокации Arrow-блоков формируют наиболее существенный вклад в потребление оперативной памяти процессом `fq-connector-go`. Минимальный объём памяти, необходимый коннектору для работы, можно приблизительно оценить по формуле $Mem = 2 \cdot Requests \cdot BPP \cdot PQC$, где $Requests$ — количество одновременно выполняемых запросов, $BPP$ — параметр `paging.bytes_per_page`, а $PQC$ — параметр `paging.prefetch_queue_capacity`. | +| `paging.bytes_per_page` | Максимальное количество байт в одном блоке. Рекомендуемые значения - от 4 до 8 МиБ, максимальное значение - 48 МиБ. Значение по умолчанию: 4 МиБ. | +| `paging.prefetch_queue_capacity` | Количество заранее вычитываемых блоков данных, которые хранятся в адресном пространстве коннектора до обращения YDB за очередным блоком данных. В некоторых сценариях бóльшие значения данной настройки могут увеличить пропускную способность, но одновременно приведут и к большему потреблению оперативной памяти процессом. Рекомендуемые значения - не менее 2. Значение по умолчанию: 2. | diff --git a/ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md b/ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md new file mode 100644 index 000000000000..0ff89e632935 --- /dev/null +++ b/ydb/docs/ru/core/deploy/manual/deploy-ydb-federated-query.md @@ -0,0 +1,59 @@ +# Развёртывание YDB с функцией Federated Query + +{% note warning %} + +Данная функциональность находится в режиме "Experimental". + +{% endnote %} + +## Общая схема инсталляции{#general-scheme} + +{{ ydb-full-name }} может выполнять [федеративные запросы](../../concepts/federated_query/index.md) ко внешним источникам (например, объектным хранилищам или реляционным СУБД) без необходимости перемещения их данных непосредственно в {{ ydb-short-name }}. В данном разделе мы рассмотрим изменения, которые необходимо внести в конфигурацию {{ ydb-short-name }} и окружающую инфраструктуру для включения функциональности федеративных запросов. + +{% note info %} + +Для организации доступа к некоторым из источников данных требуется развёртывание специального микросервиса - [коннектора](../../concepts/federated_query/architecture.md#connectors). Ознакомьтесь c [перечнем поддерживаемых источников](../../concepts/federated_query/architecture.md#supported-datasources), чтобы понять, требуется ли вам установка коннектора. + +{% endnote %} + +Кластер {{ ydb-short-name }} и внешние источники данных в варианте production-инсталляции должны развёртываться на разных физических или виртуальных серверах, в том числе в облаках. Если для доступа к определённому источнику требуется развёртывание коннектора, это необходимо сделать на тех же серверах, на которых развёрнуты динамические узлы {{ ydb-short-name }}. Иными словами, на каждый процесс `ydbd`, работающий в режиме динамического узла, должен приходиться один локальный процесс коннектора. + +При этом должны выполняться следующие требования: +* внешний источник данных должен быть доступен по сети для запросов со стороны {{ ydb-short-name }} или со стороны коннектора (при его наличии); +* коннектор должен быть доступен по сети для запросов со стороны {{ ydb-short-name }} (что достигается тривиальным образом благодаря работе этих процессов на одном и том же хосте). + +![Инсталляция {{ ydb-short-name }} FQ](_images/ydb_fq_onprem.png "Инсталляция {{ ydb-short-name }} FQ" =1024x) + +{% note info %} + +В настоящее время мы не поддерживаем развёртывание коннектора в {{k8s}}, но планируем добавить её в ближайшем будущем. + +{% endnote %} + +## Пошаговое руководство + +1. Выполните шаги инструкции по развёртыванию динамического узла {{ ydb-short-name }} до [подготовки конфигурационных файлов](./deploy-ydb-on-premises.md#config) включительно. +1. Если для доступа к нужному вам источнику требуется развернуть коннектор, сделайте это [согласно инструкции](./connector.md). +1. Если для доступа к нужному вам источнику трубуется развернуть коннектор, в конфигурационном файле {{ ydb-short-name }} в секции `query_service_config` добавьте подсекцию `generic` по приведённому ниже образцу. В полях `connector.endpoint.host` и `connector.endpoint.port` укажите сетевой адрес коннектора (по умолчанию `localhost` и `2130`). При совместном размещении коннектора и динамического узла {{ ydb-short-name }} на одном сервере установка шифрованных соединений между ними *не требуется*, но в случае необходимости вы можете включить шифрование, передав значение `true` в поле `connector.use_ssl` и указав путь до сертификата CA, использованного для подписи TLS-ключей коннектора, в `connector.ssl_ca_crt`: + ```yaml + query_service_config: + generic: + connector: + endpoint: + host: localhost # имя хоста, где развернут коннектор + port: 2130 # номер порта для слушающего сокета коннектора + use_ssl: false # флаг, включающий шифрование соединений + ssl_ca_crt: "/opt/ydb/certs/ca.crt" # (опционально) путь к сертификату CA + default_settings: + - name: DateTimeFormat + value: string + - name: UsePredicatePushdown + value: "true" + ``` +1. В конфигурационном файле {{ ydb-short-name }} добавьте секцию `feature_flags` следующего содержания: + ```yaml + feature_flags: + enable_external_data_sources: true + enable_script_execution_operations: true + ``` +1. Продолжайте развёртывание динамического узла {{ ydb-short-name }} по [инструкции](./deploy-ydb-on-premises.md). diff --git a/ydb/docs/ru/core/deploy/manual/toc_i.yaml b/ydb/docs/ru/core/deploy/manual/toc_i.yaml index e4c6085fb110..e8bd5584b771 100644 --- a/ydb/docs/ru/core/deploy/manual/toc_i.yaml +++ b/ydb/docs/ru/core/deploy/manual/toc_i.yaml @@ -1,5 +1,9 @@ items: #- name: Обзор # href: concepts.md -- name: Развертывание +- name: Развертывание YDB href: deploy-ydb-on-premises.md +- name: Развертывание YDB с функцией Federated Query + href: deploy-ydb-federated-query.md +- name: Развертывание коннектора + href: connector.md diff --git a/ydb/docs/ru/core/deploy/toc_i.yaml b/ydb/docs/ru/core/deploy/toc_i.yaml index 9c6d94948add..e907ba76574c 100644 --- a/ydb/docs/ru/core/deploy/toc_i.yaml +++ b/ydb/docs/ru/core/deploy/toc_i.yaml @@ -1,6 +1,6 @@ items: - name: VM / Baremetal - href: manual/deploy-ydb-on-premises.md + include: { mode: link, path: manual/toc_p.yaml } - name: Развертывание одноузлового кластера include: { mode: link, path: ../getting_started/self_hosted/toc_p.yaml } - name: Конфигурация diff --git a/ydb/docs/ru/core/getting_started/self_hosted/_images/ydb_fq_docker.png b/ydb/docs/ru/core/getting_started/self_hosted/_images/ydb_fq_docker.png new file mode 100644 index 000000000000..be30ef337097 Binary files /dev/null and b/ydb/docs/ru/core/getting_started/self_hosted/_images/ydb_fq_docker.png differ diff --git a/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md b/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md index 35040bf807a6..b9cf4076ba93 100644 --- a/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md +++ b/ydb/docs/ru/core/getting_started/self_hosted/_includes/ydb_docker.md @@ -97,6 +97,8 @@ docker run -d --rm --name ydb-local -h localhost \ `-v`: Монтировать директории хост-системы в контейнер в виде `<директория хост-системы>:<директория монтирования в контейнере>`. Контейнер YDB использует следующие директории монтирования: - `/ydb_data`: Размещение данных. Если данная директория не смонтирована, то контейнер будет запущен без сохранения данных на диск хост-системы. - `/ydb_certs`: Размещение сертификатов для TLS соединения. Запущенный контейнер запишет туда сертификаты, которые вам нужно использовать для клиентского подключения с использованием TLS. Если данная директория не смонтирована, то вы не сможете подключиться по TLS, так как не будете обладать информацией о сертификате. + +`-p`: Опубликовать порты контейнера на хост-системе. Все применяемые порты должны быть явно перечислены, даже если используются значения по умолчанию. `-e`: Задать переменные окружения в виде `<имя>=<значение>`. Контейнер YDB использует следующие переменные окружения: - `YDB_DEFAULT_LOG_LEVEL`: Уровень логирования. Допустимые значения: `CRIT`, `ERROR`, `WARN`, `NOTICE`, `INFO`. По умолчанию `NOTICE`. - `GRPC_PORT`: Порт для нешифрованных соединений. По умолчанию 2136. @@ -108,7 +110,7 @@ docker run -d --rm --name ydb-local -h localhost \ - `POSTGRES_USER` - создать пользователя с указанным логином, используется для подключения через postgres-протокол. - `POSTGRES_PASSWORD` - задать пароль пользователя для подключения через postgres-протокол. - `YDB_TABLE_ENABLE_PREPARED_DDL` - временная опция, нужна для запуска Postgres-слоя совместимости, в будущем будет удалена. -`-p`: Опубликовать порты контейнера на хост-системе. Все применяемые порты должны быть явно перечислены, даже если используются значения по умолчанию. +- `FQ_CONNECTOR_ENDPOINT` - задать сетевой адрес коннектора ко внешним источникам данных для обработки [федеративных запросов](../../../concepts/federated_query/index.md). Формат строки `scheme://host:port`, где допустимыми значениями `scheme` могут быть `grpcs` (указывает на подключение к коннектору по протоколу TLS) или `grpc` (подключение без шифрования). {% include [_includes/storage-device-requirements.md](../../../_includes/storage-device-requirements.md) %} @@ -166,3 +168,82 @@ docker run --rm -it --entrypoint cat {{ ydb_local_docker_image }} LICENSE ```bash docker run --rm -it --entrypoint cat {{ ydb_local_docker_image }} THIRD_PARTY_LICENSES ``` + +## Запуск {{ ydb-short-name }} Federated Query в Docker + +{% note warning %} + +Данная функциональность находится в режиме "Experimental". + +{% endnote %} + +В данном разделе рассматривается пример тестовой инсталляции {{ ydb-full-name }}, сконфигурированной для выполнения [федеративных запросов](../../../concepts/federated_query/index.md) к внешним источникам данных. Подключение {{ ydb-full-name }} к некоторым из источников требует развертывания специального микросервиса - [коннектора](../../../concepts/federated_query/architecture.md#connectors). Ниже мы воспользуемся инструментом оркестрации `docker-compose` для локального запуска Docker-контейнеров с тремя сервисами: + +* {{ ydb-short-name }} в одноузловой конфигурации; +* PostgreSQL (в качестве примера источника данных); +* Коннектор [fq-connector-go](../../../deploy/manual/connector.md#fq-connector-go). + +![YDB FQ in Docker](../_images/ydb_fq_docker.png "YDB FQ in Docker" =320x) + +{% note info %} + +В данном руководстве запросы к {{ ydb-short-name }} выполняются через [Embedded UI](../../../maintenance/embedded_monitoring/index.md). Возможность выполнения запросов через [{{ ydb-short-name }} CLI](../../../reference/ydb-cli/index.md) появится в ближайшем будущем. + +{% endnote %} + +1. Установите `docker-compose` подходящим вам [способом](https://github.com/docker/compose?tab=readme-ov-file#where-to-get-docker-compose). + +1. Скачайте [пример](https://github.com/ydb-platform/fq-connector-go/blob/main/examples/docker-compose/docker-compose.yaml) файла `docker-compose.yaml` и запустите контейнеры: + + ```bash + mkdir /tmp/fq && cd /tmp/fq + wget https://raw.githubusercontent.com/ydb-platform/fq-connector-go/main/examples/docker-compose.yaml + docker-compose pull + docker-compose up -d + ``` + +1. Инициализируйте любым удобным вам способом данные внутри развернутого в контейнере источника, например, подключившись к нему через CLI: + ```bash + docker exec -it fq-example-postgresql psql -d fq --user admin -c " + DROP TABLE IF EXISTS example; + CREATE TABLE example (id integer, col1 text, col2 integer); + INSERT INTO example VALUES (1, 'a', 10), (2, 'b', 20), (3, 'c', 30), + (4, 'd', 40), (5, 'e', 50), (6, NULL, 1);" + ``` + +1. Откройте в браузере страницу `http://hostname:8765/monitoring/tenant?schema=%2Flocal&name=%2Flocal`, где `hostname` - сетевое имя хоста, на котором развёрнуты контейнеры ([ссылка для localhost](http://localhost:8765/monitoring/tenant?schema=%2Flocal&name=%2Flocal)). Вы попадёте в Embedded UI базы данных `/local` локально развернутого инстанса {{ ydb-short-name }}. В панели для запросов введите код, регистрирующий базу данных `fq` из локального инстанса PostgreSQL в качестве внешнего источника данных для {{ ydb-short-name }}: + + ```sql + -- Создаётся секрет, содержащий пароль "password" пользователя admin базы данных PostgreSQL + CREATE OBJECT pg_local_password (TYPE SECRET) WITH (value = password); + + CREATE EXTERNAL DATA SOURCE pg_local WITH ( + SOURCE_TYPE="PostgreSQL", -- тип источника данных + DATABASE_NAME="fq", -- имя базы данных + LOCATION="postgresql:5432", -- сетевой адрес источника (в данном случае соответствует + -- имени сервиса в файле docker-compose.yaml) + AUTH_METHOD="BASIC", -- режим аутентификации по логину и паролю + LOGIN="admin", -- логин для доступа к источнику + PASSWORD_SECRET_NAME="pg_local_password", -- имя секрета, содержащего пароль пользователя + USE_TLS="FALSE", -- признак применения источником TLS-шифрования + PROTOCOL="NATIVE" -- протокол доступа к источнику данных + ); + ``` + +1. В селекторе типов запросов внизу страницы выберите `Query type: YQL Script` и нажмите кнопку `Run`. Запрос должен завершиться успешно. + +1. Затем введите запрос, непосредственно извлекающий данные из таблицы `example` базы данных `fq` локального инстанса PostgreSQL: + + ```sql + SELECT * FROM pg_local.example; + ``` + +1. В селекторе типов запросов внизу страницы выберите `Query type: YQL - QueryService` и нажмите кнопку `Run`. На экране появятся данные таблицы, созданной во внешнем источнике несколькими шагами ранее. + +Успешное выполнение последнего запроса демонстрирует работоспособность всей цепочки преобразований данных: пользователь {{ ydb-short-name }} формулирует YQL-запрос к внешней базе данных PostgreSQL, {{ ydb-short-name }} обращается к коннектору по внутреннему API, коннектор генерирует запрос на диалекте PostgreSQL, извлекает данные из внешнего источника, и передаёт их в {{ ydb-short-name }} для отображения. Точно таким же образом в одном YQL-запросе можно обратиться сразу к нескольким источникам разных типов одновременно, извлечь и объдинить данные и совместно их проанализировать. + +{% note info %} + +О дополнительных опциях запуска коннектора можно узнать в [руководстве по развертыванию](../../../deploy/manual/connector.md#fq-connector-go-docker). В качестве внешних источников данных можно использовать любое хранилище или базу данных из перечня [поддерживаемых](../../../concepts/federated_query/architecture.md#supported-datasources). + +{% endnote %} diff --git a/ydb/library/query_actor/query_actor.h b/ydb/library/query_actor/query_actor.h index ef47d2300a0a..b5bf939b8a8f 100644 --- a/ydb/library/query_actor/query_actor.h +++ b/ydb/library/query_actor/query_actor.h @@ -12,11 +12,12 @@ #include #include #include +#include +#include #include namespace NKikimr { -// TODO: add retry logic class TQueryBase : public NActors::TActorBootstrapped { protected: struct TTxControl { @@ -168,4 +169,102 @@ class TQueryBase : public NActors::TActorBootstrapped { std::vector ResultSets; }; +template +class TQueryRetryActor : public NActors::TActorBootstrapped> { +public: + using TBase = NActors::TActorBootstrapped>; + using IRetryPolicy = IRetryPolicy; + + explicit TQueryRetryActor(const NActors::TActorId& replyActorId, const TArgs&... args) + : ReplyActorId(replyActorId) + , RetryPolicy(IRetryPolicy::GetExponentialBackoffPolicy( + Retryable, TDuration::MilliSeconds(10), + TDuration::MilliSeconds(200), TDuration::Seconds(1), + std::numeric_limits::max(), TDuration::Seconds(1) + )) + , CreateQueryActor([=]() { + return new TQueryActor(args...); + }) + {} + + TQueryRetryActor(const NActors::TActorId& replyActorId, IRetryPolicy::TPtr retryPolicy, const TArgs&... args) + : ReplyActorId(replyActorId) + , RetryPolicy(retryPolicy) + , CreateQueryActor([=]() { + return new TQueryActor(args...); + }) + , RetryState(RetryPolicy->CreateRetryState()) + {} + + void StartQueryActor() const { + TBase::Register(CreateQueryActor()); + } + + void Bootstrap() { + TBase::Become(&TQueryRetryActor::StateFunc); + StartQueryActor(); + } + + STRICT_STFUNC(StateFunc, + hFunc(NActors::TEvents::TEvWakeup, Wakeup); + hFunc(TResponse, Handle); + ) + + void Wakeup(NActors::TEvents::TEvWakeup::TPtr&) { + StartQueryActor(); + } + + void Handle(const typename TResponse::TPtr& ev) { + const Ydb::StatusIds::StatusCode status = ev->Get()->Status; + if (Retryable(status) == ERetryErrorClass::NoRetry) { + Reply(ev); + return; + } + + if (RetryState == nullptr) { + RetryState = RetryPolicy->CreateRetryState(); + } + + if (auto delay = RetryState->GetNextRetryDelay(status)) { + TBase::Schedule(*delay, new NActors::TEvents::TEvWakeup()); + } else { + Reply(ev); + } + } + + void Reply(const typename TResponse::TPtr& ev) { + TBase::Send(ev->Forward(ReplyActorId)); + TBase::PassAway(); + } + + static ERetryErrorClass Retryable(Ydb::StatusIds::StatusCode status) { + if (status == Ydb::StatusIds::SUCCESS) { + return ERetryErrorClass::NoRetry; + } + + if (status == Ydb::StatusIds::INTERNAL_ERROR + || status == Ydb::StatusIds::UNAVAILABLE + || status == Ydb::StatusIds::BAD_SESSION + || status == Ydb::StatusIds::SESSION_EXPIRED + || status == Ydb::StatusIds::SESSION_BUSY + || status == Ydb::StatusIds::TIMEOUT + || status == Ydb::StatusIds::ABORTED) { + return ERetryErrorClass::ShortRetry; + } + + if (status == Ydb::StatusIds::OVERLOADED + || status == Ydb::StatusIds::UNDETERMINED) { + return ERetryErrorClass::LongRetry; + } + + return ERetryErrorClass::NoRetry; + } + +private: + const NActors::TActorId ReplyActorId; + const IRetryPolicy::TPtr RetryPolicy; + const std::function CreateQueryActor; + IRetryPolicy::IRetryState::TPtr RetryState = nullptr; +}; + } // namespace NKikimr diff --git a/ydb/library/yql/ast/yql_constraint.cpp b/ydb/library/yql/ast/yql_constraint.cpp index 57c9991be59e..48201346529e 100644 --- a/ydb/library/yql/ast/yql_constraint.cpp +++ b/ydb/library/yql/ast/yql_constraint.cpp @@ -542,7 +542,8 @@ TSortedConstraintNode::DoGetSimplifiedForType(const TTypeAnnotationNode& type, T ++it; if (ssize_t(GetElementsCount(subType)) == std::distance(from, it)) { - *from++ = std::make_pair(TPartOfConstraintBase::TSetType{std::move(prefix)}, from->second); + *from = std::make_pair(TPartOfConstraintBase::TSetType{std::move(prefix)}, from->second); + ++from; it = content.erase(from, it); changed = setChanged = true; } diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 490a089f5c1e..76bc6112fa12 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -3087,8 +3087,8 @@ std::unordered_set GetUselessSortedJoinInputs(const TCoEquiJoin& equiJoin) if (!joinTree->Head().IsAtom("Cross")) { std::unordered_map tableJoinKeys; for (const auto keys : {joinTree->Child(3), joinTree->Child(4)}) - for (ui32 i = 0U; i < keys->ChildrenSize(); ++i) - tableJoinKeys[keys->Child(i)->Content()].insert_unique(TPartOfConstraintBase::TPathType(1U, keys->Child(++i)->Content())); + for (ui32 i = 0U; i < keys->ChildrenSize(); i += 2) + tableJoinKeys[keys->Child(i)->Content()].insert_unique(TPartOfConstraintBase::TPathType(1U, keys->Child(i + 1)->Content())); for (const auto& [label, joinKeys]: tableJoinKeys) { if (const auto it = sorteds.find(label); sorteds.cend() != it) { diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index b2fed99eb33e..859768b32ad8 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -151,6 +151,7 @@ STRICT_STFUNC_EXC(TDqComputeActorCheckpoints::StateFunc, hFunc(TEvDqCompute::TEvRun, Handle); hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle); hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); hFunc(TEvRetryQueuePrivate::TEvRetry, Handle); hFunc(TEvents::TEvWakeup, Handle); cFunc(TEvents::TEvPoisonPill::EventType, PassAway);, @@ -393,6 +394,13 @@ void TDqComputeActorCheckpoints::Handle(NActors::TEvInterconnect::TEvNodeConnect EventsQueue.HandleNodeConnected(ev->Get()->NodeId); } +void TDqComputeActorCheckpoints::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + LOG_D("Handle undelivered"); + if (!EventsQueue.HandleUndelivered(ev)) { + LOG_E("TEvUndelivered: " << ev->Get()->SourceType); + } +} + void TDqComputeActorCheckpoints::Handle(TEvRetryQueuePrivate::TEvRetry::TPtr& ev) { Y_UNUSED(ev); EventsQueue.Retry(); diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h index 4bc93b6179ea..e33ba0495912 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h @@ -127,6 +127,7 @@ class TDqComputeActorCheckpoints : public NActors::TActorSender == RecipientId && ev->Get()->Reason == NActors::TEvents::TEvUndelivered::Disconnected) { + Connected = false; + ScheduleRetry(); + return true; + } + + return false; +} + void TRetryEventsQueue::Retry() { RetryScheduled = false; if (!Connected) { diff --git a/ydb/library/yql/dq/actors/compute/retry_queue.h b/ydb/library/yql/dq/actors/compute/retry_queue.h index 7ca82b9506a1..875aef00c99e 100644 --- a/ydb/library/yql/dq/actors/compute/retry_queue.h +++ b/ydb/library/yql/dq/actors/compute/retry_queue.h @@ -120,10 +120,16 @@ class TRetryEventsQueue { } return false; } + + bool RemoveConfirmedEvents() { + RemoveConfirmedEvents(MyConfirmedSeqNo); + return !Events.empty(); + } void OnNewRecipientId(const NActors::TActorId& recipientId, bool unsubscribe = true); void HandleNodeConnected(ui32 nodeId); void HandleNodeDisconnected(ui32 nodeId); + bool HandleUndelivered(NActors::TEvents::TEvUndelivered::TPtr& ev); void Retry(); void Unsubscribe(); @@ -160,7 +166,7 @@ class TRetryEventsQueue { THolder ev = MakeHolder(); ev->Record = Event->Record; ev->Record.MutableTransportMeta()->SetConfirmedSeqNo(confirmedSeqNo); - return MakeHolder(Recipient, Sender, ev.Release(), 0, Cookie); + return MakeHolder(Recipient, Sender, ev.Release(), NActors::IEventHandle::FlagTrackDelivery, Cookie); } private: diff --git a/ydb/library/yql/dq/integration/yql_dq_integration.h b/ydb/library/yql/dq/integration/yql_dq_integration.h index 7765ab128e28..5f17e627905a 100644 --- a/ydb/library/yql/dq/integration/yql_dq_integration.h +++ b/ydb/library/yql/dq/integration/yql_dq_integration.h @@ -62,7 +62,7 @@ class IDqIntegration { virtual bool CanBlockRead(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) = 0; virtual void RegisterMkqlCompiler(NCommon::TMkqlCallableCompilerBase& compiler) = 0; virtual bool CanFallback() = 0; - virtual void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) = 0; + virtual void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t maxPartitions) = 0; virtual void FillSinkSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sinkType) = 0; virtual void FillTransformSettings(const TExprNode& node, ::google::protobuf::Any& settings) = 0; virtual void Annotate(const TExprNode& node, THashMap& params) = 0; diff --git a/ydb/library/yql/dq/opt/dq_opt_hopping.cpp b/ydb/library/yql/dq/opt/dq_opt_hopping.cpp new file mode 100644 index 000000000000..ff84188dc929 --- /dev/null +++ b/ydb/library/yql/dq/opt/dq_opt_hopping.cpp @@ -0,0 +1,793 @@ +#include "dq_opt_hopping.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NYql::NDq::NHopping { + +using namespace NYql; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +struct THoppingTraits { + TString Column; + TCoHoppingTraits Traits; + ui64 Hop; + ui64 Interval; + ui64 Delay; +}; + + struct TKeysDescription { + TVector PickleKeys; + TVector MemberKeys; + TVector FakeKeys; + + TKeysDescription(const TStructExprType& rowType, const TCoAtomList& keys, const TString& hoppingColumn) { + for (const auto& key : keys) { + if (key.StringValue() == hoppingColumn) { + FakeKeys.emplace_back(key.StringValue()); + continue; + } + + const auto index = rowType.FindItem(key.StringValue()); + Y_ENSURE(index); + + auto itemType = rowType.GetItems()[*index]->GetItemType(); + if (RemoveOptionalType(itemType)->GetKind() == ETypeAnnotationKind::Data) { + MemberKeys.emplace_back(key.StringValue()); + continue; + } + + PickleKeys.emplace_back(key.StringValue()); + } + } + + TExprNode::TPtr BuildPickleLambda(TExprContext& ctx, TPositionHandle pos) const { + TCoArgument arg = Build(ctx, pos) + .Name("item") + .Done(); + + TExprBase body = arg; + + for (const auto& key : PickleKeys) { + const auto member = Build(ctx, pos) + .Name().Build(key) + .Struct(arg) + .Done() + .Ptr(); + + body = Build(ctx, pos) + .Struct(body) + .Name().Build(key) + .Item(ctx.NewCallable(pos, "StablePickle", { member })) + .Done(); + } + + return Build(ctx, pos) + .Args({arg}) + .Body(body) + .Done() + .Ptr(); + } + + TExprNode::TPtr BuildUnpickleLambda(TExprContext& ctx, TPositionHandle pos, const TStructExprType& rowType) { + TCoArgument arg = Build(ctx, pos) + .Name("item") + .Done(); + + TExprBase body = arg; + + for (const auto& key : PickleKeys) { + const auto index = rowType.FindItem(key); + Y_ENSURE(index); + + auto itemType = rowType.GetItems().at(*index)->GetItemType(); + const auto member = Build(ctx, pos) + .Name().Build(key) + .Struct(arg) + .Done() + .Ptr(); + + body = Build(ctx, pos) + .Struct(body) + .Name().Build(key) + .Item(ctx.NewCallable(pos, "Unpickle", { ExpandType(pos, *itemType, ctx), member })) + .Done(); + } + + return Build(ctx, pos) + .Args({arg}) + .Body(body) + .Done() + .Ptr(); + } + + TVector GetKeysList(TExprContext& ctx, TPositionHandle pos) const { + TVector res; + res.reserve(PickleKeys.size() + MemberKeys.size()); + + for (const auto& pickleKey : PickleKeys) { + res.emplace_back(Build(ctx, pos).Value(pickleKey).Done()); + } + for (const auto& memberKey : MemberKeys) { + res.emplace_back(Build(ctx, pos).Value(memberKey).Done()); + } + return res; + } + + TVector GetActualGroupKeys() { + TVector result; + result.reserve(PickleKeys.size() + MemberKeys.size()); + result.insert(result.end(), PickleKeys.begin(), PickleKeys.end()); + result.insert(result.end(), MemberKeys.begin(), MemberKeys.end()); + return result; + } + + bool NeedPickle() const { + return !PickleKeys.empty(); + } + + TExprNode::TPtr GetKeySelector(TExprContext& ctx, TPositionHandle pos, const TStructExprType* rowType) { + auto builder = Build(ctx, pos); + for (auto key : GetKeysList(ctx, pos)) { + builder.Add(std::move(key)); + } + return BuildKeySelector(pos, *rowType, builder.Build().Value().Ptr(), ctx); + } +}; + +TString BuildColumnName(const TExprBase& column) { + if (const auto columnName = column.Maybe()) { + return columnName.Cast().StringValue(); + } + + if (const auto columnNames = column.Maybe()) { + TStringBuilder columnNameBuilder; + for (const auto columnName : columnNames.Cast()) { + columnNameBuilder.append(columnName.StringValue()); + columnNameBuilder.append("_"); + } + return columnNameBuilder; + } + + YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " + << column.Ptr()->Dump()); +} + +bool IsLegacyHopping(const TExprNode::TPtr& hoppingSetting) { + return !hoppingSetting->Child(1)->IsList(); +} + +void EnsureNotDistinct(const TCoAggregate& aggregate) { + const auto& aggregateHandlers = aggregate.Handlers(); + + YQL_ENSURE( + AllOf(aggregateHandlers, [](const auto& t){ return !t.DistinctName(); }), + "Distinct is not supported for aggregation with hop"); +} + +TMaybe ExtractHopTraits(const TCoAggregate& aggregate, TExprContext& ctx, bool analyticsMode) { + const auto pos = aggregate.Pos(); + + const auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); + if (!hopSetting) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Aggregate over stream must have 'hopping' setting")); + return Nothing(); + } + + const auto hoppingColumn = IsLegacyHopping(hopSetting) + ? "_yql_time" + : TString(hopSetting->Child(1)->Child(0)->Content()); + + const auto traitsNode = IsLegacyHopping(hopSetting) + ? hopSetting->Child(1) + : hopSetting->Child(1)->Child(1); + + const auto maybeTraits = TMaybeNode(traitsNode); + if (!maybeTraits) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Invalid 'hopping' setting in Aggregate")); + return Nothing(); + } + + const auto traits = maybeTraits.Cast(); + + const auto checkIntervalParam = [&] (TExprBase param) -> ui64 { + if (param.Maybe()) { + param = param.Cast().Input(); + } + if (!param.Maybe()) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Not an interval data ctor")); + return 0; + } + auto value = FromString(param.Cast().Literal().Value()); + if (value <= 0) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval value must be positive")); + return 0; + } + return (ui64)value; + }; + + const auto hop = checkIntervalParam(traits.Hop()); + if (!hop) { + return Nothing(); + } + const auto interval = checkIntervalParam(traits.Interval()); + if (!interval) { + return Nothing(); + } + const auto delay = checkIntervalParam(traits.Delay()); + if (!delay) { + return Nothing(); + } + + if (interval < hop) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval must be greater or equal then hop")); + return Nothing(); + } + if (delay < hop) { + ctx.AddError(TIssue(ctx.GetPosition(pos), "Delay must be greater or equal then hop")); + return Nothing(); + } + + const auto newTraits = Build(ctx, aggregate.Pos()) + .InitFrom(traits) + .DataWatermarks(analyticsMode + ? ctx.NewAtom(aggregate.Pos(), "false") + : traits.DataWatermarks().Ptr()) + .Done(); + + return THoppingTraits { + hoppingColumn, + newTraits, + hop, + interval, + delay + }; +} + +TExprNode::TPtr BuildTimeExtractor(const TCoHoppingTraits& hoppingTraits, TExprContext& ctx) { + const auto pos = hoppingTraits.Pos(); + + if (hoppingTraits.ItemType().Ref().GetTypeAnn()->Cast()->GetType()->Cast()->GetSize() == 0) { + // The case when no fields are used in lambda. F.e. when it has only DependsOn. + return ctx.DeepCopyLambda(hoppingTraits.TimeExtractor().Ref()); + } + + return Build(ctx, pos) + .Args({"item"}) + .Body() + .Apply(hoppingTraits.TimeExtractor()) + .With(0) + .Type(hoppingTraits.ItemType()) + .Value("item") + .Build() + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildInitHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto& aggregateHandlers = aggregate.Handlers(); + + const auto initItemArg = Build(ctx, pos).Name("item").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + ui32 index = 0; + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + + TMaybeNode applier; + if (tuple.Trait().Cast().InitHandler().Args().Size() == 1) { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().InitHandler()) + .With(0, initItemArg) + .Done(); + } else { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().InitHandler()) + .With(0, initItemArg) + .With(1) + .Literal().Build(ToString(index)) + .Build() + .Done(); + } + + structItems.push_back(Build(ctx, pos) + .Name().Build(BuildColumnName(tuple.ColumnName())) + .Value(applier) + .Done()); + ++index; + } + + return Build(ctx, pos) + .Args({initItemArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildUpdateHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + const auto updateItemArg = Build(ctx, pos).Name("item").Done(); + const auto updateStateArg = Build(ctx, pos).Name("state").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + i32 index = 0; + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(updateStateArg) + .Name().Build(columnName) + .Done(); + + TMaybeNode applier; + if (tuple.Trait().Cast().UpdateHandler().Args().Size() == 2) { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().UpdateHandler()) + .With(0, updateItemArg) + .With(1, member) + .Done(); + } else { + applier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().UpdateHandler()) + .With(0, updateItemArg) + .With(1, member) + .With(2) + .Literal().Build(ToString(index)) + .Build() + .Done(); + } + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value(applier) + .Done()); + ++index; + } + + return Build(ctx, pos) + .Args({updateItemArg, updateStateArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr WrapToShuffle( + const TKeysDescription& keysDescription, + const TCoAggregate& aggregate, + const TDqConnection& input, + TExprContext& ctx) +{ + auto pos = aggregate.Pos(); + + TDqStageBase mappedInput = input.Output().Stage(); + if (keysDescription.NeedPickle()) { + mappedInput = Build(ctx, pos) + .Inputs() + .Add() + .Output() + .Stage(input.Output().Stage()) + .Index(input.Output().Index()) + .Build() + .Build() + .Build() + .Program() + .Args({"stream"}) + .Body() + .Input("stream") + .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) + .Build() + .Build() + .Settings(TDqStageSettings().BuildNode(ctx, pos)) + .Done(); + } + + return Build(ctx, pos) + .Output() + .Stage(mappedInput) + .Index().Value("0").Build() + .Build() + .KeyColumns() + .Add(keysDescription.GetKeysList(ctx, pos)) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildMergeHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto& aggregateHandlers = aggregate.Handlers(); + + const auto mergeState1Arg = Build(ctx, pos).Name("state1").Done(); + const auto mergeState2Arg = Build(ctx, pos).Name("state2").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member1 = Build(ctx, pos) + .Struct(mergeState1Arg) + .Name().Build(columnName) + .Done(); + const auto member2 = Build(ctx, pos) + .Struct(mergeState2Arg) + .Name().Build(columnName) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value() + .Apply(tuple.Trait().Cast().MergeHandler()) + .With(0, member1) + .With(1, member2) + .Build() + .Done()); + } + + return Build(ctx, pos) + .Args({mergeState1Arg, mergeState2Arg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildFinishHopLambda( + const TCoAggregate& aggregate, + const TVector& actualGroupKeys, + const TString& hoppingColumn, + TExprContext& ctx) +{ + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + const auto finishKeyArg = Build(ctx, pos).Name("key").Done(); + const auto finishStateArg = Build(ctx, pos).Name("state").Done(); + const auto finishTimeArg = Build(ctx, pos).Name("time").Done(); + + TVector structItems; + structItems.reserve(actualGroupKeys.size() + aggregateHandlers.Size() + 1); + + if (actualGroupKeys.size() == 1) { + structItems.push_back(Build(ctx, pos) + .Name().Build(actualGroupKeys[0]) + .Value(finishKeyArg) + .Done()); + } else { + for (size_t i = 0; i < actualGroupKeys.size(); ++i) { + structItems.push_back(Build(ctx, pos) + .Name().Build(actualGroupKeys[i]) + .Value() + .Tuple(finishKeyArg) + .Index() + .Value(ToString(i)) + .Build() + .Build() + .Done()); + } + } + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString compoundColumnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(finishStateArg) + .Name().Build(compoundColumnName) + .Done(); + + if (tuple.ColumnName().Maybe()) { + structItems.push_back(Build(ctx, pos) + .Name().Build(compoundColumnName) + .Value() + .Apply(tuple.Trait().Cast().FinishHandler()) + .With(0, member) + .Build() + .Done()); + + continue; + } + + if (const auto namesList = tuple.ColumnName().Maybe()) { + const auto expApplier = Build(ctx, pos) + .Apply(tuple.Trait().Cast().FinishHandler()) + .With(0, member) + .Done(); + + int index = 0; + for (const auto columnName : namesList.Cast()) { + const auto extracter = Build(ctx, pos) + .Tuple(expApplier) + .Index().Build(index++) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name(columnName) + .Value(extracter) + .Done()); + } + + continue; + } + + YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " + << tuple.ColumnName().Ptr()->Dump()); + } + + structItems.push_back(Build(ctx, pos) + .Name().Build(hoppingColumn) + .Value(finishTimeArg) + .Done()); + + return Build(ctx, pos) + .Args({finishKeyArg, finishStateArg, finishTimeArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildSaveHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + const auto saveStateArg = Build(ctx, pos).Name("state").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(saveStateArg) + .Name().Build(columnName) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value() + .Apply(tuple.Trait().Cast().SaveHandler()) + .With(0, member) + .Build() + .Done()); + } + + return Build(ctx, pos) + .Args({saveStateArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TExprNode::TPtr BuildLoadHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { + const auto pos = aggregate.Pos(); + const auto aggregateHandlers = aggregate.Handlers(); + + TCoArgument loadStateArg = Build(ctx, pos).Name("state").Done(); + + TVector structItems; + structItems.reserve(aggregateHandlers.Size()); + + for (const auto& handler : aggregateHandlers) { + const auto tuple = handler.Cast(); + const TString columnName = BuildColumnName(tuple.ColumnName()); + + const auto member = Build(ctx, pos) + .Struct(loadStateArg) + .Name().Build(columnName) + .Done(); + + structItems.push_back(Build(ctx, pos) + .Name().Build(columnName) + .Value() + .Apply(tuple.Trait().Cast().LoadHandler()) + .With(0, member) + .Build() + .Done()); + } + + return Build(ctx, pos) + .Args({loadStateArg}) + .Body() + .Add(structItems) + .Build() + .Done() + .Ptr(); +} + +TMaybe BuildWatermarkMode( + const TCoAggregate& aggregate, + const TCoHoppingTraits& hoppingTraits, + TExprContext& ctx, + bool analyticsMode, + bool defaultWatermarksMode, + bool syncActor) +{ + const bool enableWatermarks = !analyticsMode && + defaultWatermarksMode && + hoppingTraits.Version().Cast().StringValue() == "v2"; + if (enableWatermarks && syncActor) { + ctx.AddError(TIssue(ctx.GetPosition(aggregate.Pos()), "Watermarks should be used only with async compute actor")); + return Nothing(); + } + + if (hoppingTraits.Version().Cast().StringValue() == "v2" && !enableWatermarks) { + ctx.AddError(TIssue( + ctx.GetPosition(aggregate.Pos()), + "HoppingWindow requires watermarks to be enabled. If you don't want to do that, you can use HOP instead.")); + return Nothing(); + } + + return enableWatermarks; +} + +TMaybeNode RewriteAsHoppingWindow( + const TExprBase node, + TExprContext& ctx, + const TDqConnection& input, + bool analyticsMode, + TDuration lateArrivalDelay, + bool defaultWatermarksMode, + bool syncActor) { + const auto aggregate = node.Cast(); + const auto pos = aggregate.Pos(); + + YQL_CLOG(DEBUG, ProviderDq) << "OptimizeStreamingAggregate"; + + EnsureNotDistinct(aggregate); + + const auto maybeHopTraits = ExtractHopTraits(aggregate, ctx, analyticsMode); + if (!maybeHopTraits) { + return nullptr; + } + const auto hopTraits = *maybeHopTraits; + + const auto aggregateInputType = GetSeqItemType(*node.Ptr()->Head().GetTypeAnn()).Cast(); + TKeysDescription keysDescription(*aggregateInputType, aggregate.Keys(), hopTraits.Column); + + if (keysDescription.NeedPickle()) { + return Build(ctx, pos) + .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) + .Input() + .InitFrom(aggregate) + .Input() + .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) + .Input(input) + .Build() + .Build() + .Done(); + } + + const auto keyLambda = keysDescription.GetKeySelector(ctx, pos, aggregateInputType); + const auto timeExtractorLambda = BuildTimeExtractor(hopTraits.Traits, ctx); + const auto initLambda = BuildInitHopLambda(aggregate, ctx); + const auto updateLambda = BuildUpdateHopLambda(aggregate, ctx); + const auto saveLambda = BuildSaveHopLambda(aggregate, ctx); + const auto loadLambda = BuildLoadHopLambda(aggregate, ctx); + const auto mergeLambda = BuildMergeHopLambda(aggregate, ctx); + const auto finishLambda = BuildFinishHopLambda(aggregate, keysDescription.GetActualGroupKeys(), hopTraits.Column, ctx); + const auto enableWatermarks = BuildWatermarkMode(aggregate, hopTraits.Traits, ctx, analyticsMode, defaultWatermarksMode, syncActor); + if (!enableWatermarks) { + return nullptr; + } + + const auto streamArg = Build(ctx, pos).Name("stream").Done(); + auto multiHoppingCoreBuilder = Build(ctx, pos) + .KeyExtractor(keyLambda) + .TimeExtractor(timeExtractorLambda) + .Hop(hopTraits.Traits.Hop()) + .Interval(hopTraits.Traits.Interval()) + .DataWatermarks(hopTraits.Traits.DataWatermarks()) + .InitHandler(initLambda) + .UpdateHandler(updateLambda) + .MergeHandler(mergeLambda) + .FinishHandler(finishLambda) + .SaveHandler(saveLambda) + .LoadHandler(loadLambda) + .template WatermarkMode().Build(ToString(*enableWatermarks)); + + if (*enableWatermarks) { + const auto hop = TDuration::MicroSeconds(hopTraits.Hop); + multiHoppingCoreBuilder.template Delay() + .Literal().Build(ToString(Max(hop, lateArrivalDelay).MicroSeconds())) + .Build(); + } else { + multiHoppingCoreBuilder.Delay(hopTraits.Traits.Delay()); + } + + if (analyticsMode) { + return Build(ctx, node.Pos()) + .Input(input.Ptr()) + .KeySelectorLambda(keyLambda) + .SortDirections() + .Literal() + .Value("true") + .Build() + .Build() + .SortKeySelectorLambda(timeExtractorLambda) + .ListHandlerLambda() + .Args(streamArg) + .template Body() + .Stream(multiHoppingCoreBuilder + .template Input() + .List(streamArg) + .Build() + .Done()) + .Build() + .Build() + .Done(); + } else { + auto wrappedInput = input.Ptr(); + if (!keysDescription.MemberKeys.empty()) { + // Shuffle input connection by keys + wrappedInput = WrapToShuffle(keysDescription, aggregate, input, ctx); + if (!wrappedInput) { + return nullptr; + } + } + + const auto stage = Build(ctx, node.Pos()) + .Inputs() + .Add(wrappedInput) + .Build() + .Program() + .Args(streamArg) + .Body() + .Input(multiHoppingCoreBuilder + .template Input() + .Input(streamArg) + .Build() + .Done()) + .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) + .Build() + .Build() + .Settings(TDqStageSettings().BuildNode(ctx, node.Pos())) + .Done(); + + return Build(ctx, node.Pos()) + .Output() + .Stage(stage) + .Index().Build(0) + .Build() + .Done(); + } +} + + +} // NYql::NDq::NHopping diff --git a/ydb/library/yql/dq/opt/dq_opt_hopping.h b/ydb/library/yql/dq/opt/dq_opt_hopping.h new file mode 100644 index 000000000000..7d690f6ab2fa --- /dev/null +++ b/ydb/library/yql/dq/opt/dq_opt_hopping.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include + +namespace NYql::NDq::NHopping { + +NNodes::TMaybeNode RewriteAsHoppingWindow( + const NNodes::TExprBase node, + TExprContext& ctx, + const NNodes::TDqConnection& input, + bool analyticsHopping, + TDuration lateArrivalDelay, + bool defaultWatermarksMode, + bool syncActor); + +} // namespace NYql::NDq::NHopping diff --git a/ydb/library/yql/dq/opt/ya.make b/ydb/library/yql/dq/opt/ya.make index 15c6c03dafa4..b1c6a87b94c8 100644 --- a/ydb/library/yql/dq/opt/ya.make +++ b/ydb/library/yql/dq/opt/ya.make @@ -15,6 +15,7 @@ SRCS( dq_opt.cpp dq_opt_build.cpp dq_opt_join.cpp + dq_opt_hopping.cpp dq_opt_log.cpp dq_opt_peephole.cpp dq_opt_phy_finalizing.cpp diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp index b98153d66991..8777bb439bc4 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp @@ -272,7 +272,7 @@ void TGraceJoinPacker::Pack() { case NUdf::EDataSlot::Interval: WriteUnaligned(buffPtr, value.Get()); break; case NUdf::EDataSlot::Date32: - WriteUnaligned(buffPtr, value.Get()); break; + WriteUnaligned(buffPtr, value.Get()); break; case NUdf::EDataSlot::Datetime64: WriteUnaligned(buffPtr, value.Get()); break; case NUdf::EDataSlot::Timestamp64: @@ -300,7 +300,7 @@ void TGraceJoinPacker::Pack() { } case NUdf::EDataSlot::TzTimestamp: { - WriteUnaligned(buffPtr, value.Get()); + WriteUnaligned(buffPtr, value.Get()); WriteUnaligned(buffPtr + sizeof(ui64), value.GetTimezoneId()); break; } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index 51f78cb971d0..af4027dee5f7 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -76,7 +76,7 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings } - XXH64_hash_t hash = XXH64(TempTuple.data(), TempTuple.size() * sizeof(ui64), 0); + XXH64_hash_t hash = XXH64(TempTuple.data() + NullsBitmapSize_, (TempTuple.size() - NullsBitmapSize_) * sizeof(ui64), 0); if (!hash) hash = 1; @@ -298,6 +298,8 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef std::swap(JoinTable1, JoinTable2); } + ui64 tuplesFound = 0; + std::vector> joinSlots, spillSlots, slotToIdx; std::vector> stringsOffsets1, stringsOffsets2; ui64 reservedSize = 6 * (DefaultTupleBytes * DefaultTuplesNum) / sizeof(ui64); @@ -320,22 +322,28 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef ui64 nullsSize2 = JoinTable2->NullsBitmapSize_; ui64 keyIntOffset1 = HashSize + nullsSize1; ui64 keyIntOffset2 = HashSize + nullsSize2; + bool table1HasKeyStringColumns = (JoinTable1->NumberOfKeyStringColumns != 0); + bool table2HasKeyStringColumns = (JoinTable2->NumberOfKeyStringColumns != 0); + bool table1HasKeyIColumns = (JoinTable1->NumberOfKeyIColumns != 0); + bool table2HasKeyIColumns = (JoinTable2->NumberOfKeyIColumns != 0); + if ( bucket2->TuplesNum > bucket1->TuplesNum ) { std::swap(bucket1, bucket2); std::swap(headerSize1, headerSize2); std::swap(nullsSize1, nullsSize2); std::swap(keyIntOffset1, keyIntOffset2); + std::swap(table1HasKeyStringColumns, table2HasKeyStringColumns); + std::swap(table1HasKeyIColumns, table2HasKeyIColumns); } joinResults.reserve(3 * bucket1->TuplesNum ); - ui64 headerSize = JoinTable1->HeaderSize; - ui64 slotSize = headerSize; + ui64 slotSize = headerSize2; ui64 avgStringsSize = ( 3 * (bucket2->KeyIntVals.size() - bucket2->TuplesNum * headerSize2) ) / ( 2 * bucket2->TuplesNum + 1) + 1; - if (JoinTable1->NumberOfKeyStringColumns != 0 || JoinTable1->NumberOfKeyIColumns != 0) { + if (table2HasKeyStringColumns || table2HasKeyIColumns ) { slotSize = slotSize + avgStringsSize; } @@ -352,7 +360,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef while (it2 != bucket2->KeyIntVals.end() ) { ui64 keysValSize; - if ( JoinTable2->NumberOfKeyStringColumns > 0 || JoinTable2->NumberOfKeyIColumns > 0) { + if ( table2HasKeyStringColumns || table2HasKeyIColumns) { keysValSize = headerSize2 + *(it2 + headerSize2 - 1) ; } else { keysValSize = headerSize2; @@ -397,7 +405,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef while ( it1 < bucket1->KeyIntVals.end() ) { ui64 keysValSize; - if ( JoinTable1->NumberOfKeyStringColumns > 0 || JoinTable1->NumberOfKeyIColumns > 0) { + if ( table1HasKeyStringColumns || table1HasKeyIColumns ) { keysValSize = headerSize1 + *(it1 + headerSize1 - 1) ; } else { keysValSize = headerSize1; @@ -417,24 +425,28 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef auto slotIt = joinSlots.begin() + slotNum * slotSize; while (*slotIt != 0 && slotIt != joinSlots.end()) { + bool matchFound = false; - if (keysValSize <= slotSize && !JoinTable1->NumberOfKeyIColumns ) { + if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !table1HasKeyIColumns ) { if (std::equal(it1 + keyIntOffset1, it1 + keysValSize, slotIt + keyIntOffset2)) { + tuplesFound++; matchFound = true; } } - if (keysValSize > slotSize && !JoinTable1->NumberOfKeyIColumns ) { + if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !table1HasKeyIColumns) { if (std::equal(it1 + keyIntOffset1, it1 + headerSize1, slotIt + keyIntOffset2)) { ui64 stringsPos = *(slotIt + headerSize2); ui64 stringsSize = *(it1 + headerSize1 - 1); if (std::equal(it1 + headerSize1, it1 + headerSize1 + stringsSize, spillSlots.begin() + stringsPos)) { + tuplesFound++; matchFound = true; } } } - if (JoinTable1->NumberOfKeyIColumns) + + if (table1HasKeyIColumns) { bool headerMatch = false; bool stringsMatch = false; @@ -451,7 +463,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef slotStringsStart = spillSlots.begin() + stringsPos; } - if ( JoinTable1->NumberOfKeyStringColumns == 0) { + if ( !table1HasKeyStringColumns) { stringsMatch = true; } else { ui64 stringsSize = *(it1 + headerSize1 - 1); @@ -478,32 +490,33 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef } if (headerMatch && stringsMatch && iValuesMatch) { + tuplesFound++; matchFound = true; } - } + } - if (matchFound) + if (matchFound) + { + JoinTuplesIds joinIds; + joinIds.id1 = tuple1Idx; + joinIds.id2 = slotToIdx[(slotIt - joinSlots.begin()) / slotSize]; + if (JoinTable2->TableBuckets[bucket].TuplesNum > JoinTable1->TableBuckets[bucket].TuplesNum) { - JoinTuplesIds joinIds; - joinIds.id1 = tuple1Idx; - joinIds.id2 = slotToIdx[(slotIt - joinSlots.begin()) / slotSize]; - if (JoinTable2->TableBuckets[bucket].TuplesNum > JoinTable1->TableBuckets[bucket].TuplesNum) - { - std::swap(joinIds.id1, joinIds.id2); - } - joinResults.emplace_back(joinIds); + std::swap(joinIds.id1, joinIds.id2); } - - slotIt += slotSize; - if (slotIt == joinSlots.end()) - slotIt = joinSlots.begin(); + joinResults.emplace_back(joinIds); } + slotIt += slotSize; + if (slotIt == joinSlots.end()) + slotIt = joinSlots.begin(); + } it1 += keysValSize; tuple1Idx ++; } + std::sort(joinResults.begin(), joinResults.end(), [](JoinTuplesIds a, JoinTuplesIds b) { if (a.id1 < b.id1) return true; @@ -555,6 +568,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef HasMoreLeftTuples_ = hasMoreLeftTuples; HasMoreRightTuples_ = hasMoreRightTuples; + TuplesFound_ += tuplesFound; } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h index 3eb2056d02df..c6e60d85e819 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h @@ -169,6 +169,8 @@ class TTable { bool Table2Initialized_ = false; // True when iterator counters for second table already initialized + ui64 TuplesFound_ = 0; // Total number of matching keys found during join + public: // Adds new tuple to the table. intColumns, stringColumns - data of columns, diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp index 2853ea5c275f..319073885250 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp @@ -1522,12 +1522,12 @@ Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) { const auto iterator = graph->GetValue().GetListIterator(); NUdf::TUnboxedValue tuple; - UNIT_ASSERT(iterator.Next(tuple)); - UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X"); - UNIT_ASSERT(!tuple.GetElement(1)); UNIT_ASSERT(iterator.Next(tuple)); UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A"); UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get(), 1); + UNIT_ASSERT(iterator.Next(tuple)); + UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X"); + UNIT_ASSERT(!tuple.GetElement(1)); UNIT_ASSERT(!iterator.Next(tuple)); UNIT_ASSERT(!iterator.Next(tuple)); } diff --git a/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp b/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp index 2a5e7ec832a5..f5aa18638eb5 100644 --- a/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp +++ b/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp @@ -75,7 +75,7 @@ class TClickHouseDqIntegration: public TDqIntegrationBase { return 0ULL; } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override { const TDqSource source(&node); if (const auto maySettings = source.Settings().Maybe()) { const auto settings = maySettings.Cast(); diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp index 98f43a555531..38e013eee5cc 100644 --- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp +++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp @@ -51,7 +51,7 @@ bool TDqIntegrationBase::CanFallback() { return false; } -void TDqIntegrationBase::FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&) { +void TDqIntegrationBase::FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&, size_t) { } void TDqIntegrationBase::FillSinkSettings(const TExprNode&, ::google::protobuf::Any&, TString&) { diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h index a2720db03331..d658d2e018ca 100644 --- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h +++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h @@ -18,7 +18,7 @@ class TDqIntegrationBase: public IDqIntegration { bool CanBlockRead(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) override; TExprNode::TPtr WrapWrite(const TExprNode::TPtr& write, TExprContext& ctx) override; bool CanFallback() override; - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) override; + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t) override; void FillSinkSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sinkType) override; void FillTransformSettings(const TExprNode& node, ::google::protobuf::Any& settings) override; void Annotate(const TExprNode& node, THashMap& params) override; diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto index fb845b8cee7b..1838bdd39710 100644 --- a/ydb/library/yql/providers/common/proto/gateways_config.proto +++ b/ydb/library/yql/providers/common/proto/gateways_config.proto @@ -563,12 +563,11 @@ message TGenericClusterConfig { // Credentials used to access data source instance optional NYql.NConnector.NApi.TCredentials Credentials = 10; - // Credentials used to access MDB API. - // When working with data source instances deployed in a cloud, - // you should either set (ServiceAccountId, ServiceAccountIdSignature) pair, - // or set IAM Token. - // The names of these fields must satisfy this template function: - // https://github.com/ydb-platform/ydb/arcadia/contrib/ydb/core/fq/libs/actors/clusters_from_connections.cpp?rev=r11823087#L19 + // Credentials used to access managed databases APIs. + // When working with external data source instances deployed in clouds, + // one should either set (ServiceAccountId, ServiceAccountIdSignature) pair + // that will be resolved into IAM Token via Token Accessor, + // or provide IAM Token directly. optional string ServiceAccountId = 6; optional string ServiceAccountIdSignature = 7; optional string Token = 11; @@ -592,9 +591,11 @@ message TGenericClusterConfig { message TGenericConnectorConfig { // Connector instance network endpoint optional NYql.NConnector.NApi.TEndpoint Endpoint = 3; - // If true, GRPC Client will use TLS encryption. - // Server cert will be verified with system CA cert pool. + // If true, Connector GRPC Client will use TLS encryption. optional bool UseSsl = 4; + // Path to the custom CA certificate to verify Connector's certs. + // If empty, the default system CA certificate pool will be used. + optional string SslCaCrt = 5; reserved 1, 2; } @@ -607,9 +608,14 @@ message TGenericGatewayConfig { // Database clusters supported by this particular instance repeated TGenericClusterConfig ClusterMapping = 3; - // MDB API endpoint (do not fill in case of on-prem deployment) + // MDB API endpoint (no need to fill in case of on-prem deployment). optional string MdbGateway = 4; + // YDB MVP API endpoint (no need to fill in case of on-prem deployment). + // Expected format: + // [http|https]://host:port/ydbc/cloud-prod/ + optional string YdbMvpEndpoint = 7; + repeated TAttr DefaultSettings = 6; reserved 1, 2; @@ -618,7 +624,9 @@ message TGenericGatewayConfig { /////////////////////////////// Db Resolver /////////////////////////////////// message TDbResolverConfig { - // Ydb / Yds mvp endpoint + // Ydb / Yds MVP endpoint. + // Expected format: + // [http|https]://host:port/ydbc/cloud-prod/ optional string YdbMvpEndpoint = 2; } diff --git a/ydb/library/yql/providers/dq/common/ya.make b/ydb/library/yql/providers/dq/common/ya.make index 82704ed75da4..b5a953a629ca 100644 --- a/ydb/library/yql/providers/dq/common/ya.make +++ b/ydb/library/yql/providers/dq/common/ya.make @@ -8,6 +8,7 @@ PEERDIR( ydb/library/yql/utils/log ydb/library/yql/dq/actors ydb/library/yql/dq/proto + ydb/library/yql/dq/integration ) GENERATE_ENUM_SERIALIZATION(yql_dq_settings.h) diff --git a/ydb/library/yql/providers/dq/counters/counters.h b/ydb/library/yql/providers/dq/counters/counters.h index d99722fae175..c7edf1cfa038 100644 --- a/ydb/library/yql/providers/dq/counters/counters.h +++ b/ydb/library/yql/providers/dq/counters/counters.h @@ -61,6 +61,10 @@ struct TCounters { Counters[name] = TEntry(value); } + void SetTimeCounter(const TString& name, i64 value) const { + SetCounter(name, value * 1000); // ms => us + } + THashMap& GetHistogram(const TString& name) { return Histograms[name]; } diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp index b294d57a6180..f36fe4333310 100644 --- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -23,33 +24,6 @@ using namespace NYql; using namespace NYql::NDq; using namespace NYql::NNodes; -namespace { - - -TString BuildColumnName(const TExprBase column) { - if (const auto columnName = column.Maybe()) { - return columnName.Cast().StringValue(); - } - - if (const auto columnNames = column.Maybe()) { - TStringBuilder columnNameBuilder; - for (const auto columnName : columnNames.Cast()) { - columnNameBuilder.append(columnName.StringValue()); - columnNameBuilder.append("_"); - } - return columnNameBuilder; - } - - YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " - << column.Ptr()->Dump()); -} - -bool IsLegacyHopping(const TExprNode::TPtr& hoppingSetting) { - return !hoppingSetting->Child(1)->IsList(); -} - -} - class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { public: TDqsLogicalOptProposalTransformer(TTypeAnnotationContext* typeCtx, const TDqConfiguration::TPtr& config) @@ -336,7 +310,13 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); if (input) { if (hopSetting) { - return RewriteAsHoppingWindow(node, ctx, input.Cast()); + bool analyticsHopping = Config->AnalyticsHopping.Get().GetOrElse(false); + const auto lateArrivalDelay = TDuration::MilliSeconds(Config->WatermarksLateArrivalDelayMs + .Get() + .GetOrElse(TDqSettings::TDefault::WatermarksLateArrivalDelayMs)); + bool defaultWatermarksMode = Config->WatermarksMode.Get() == "default"; + bool syncActor = Config->ComputeActorType.Get() != "async"; + return NHopping::RewriteAsHoppingWindow(node, ctx, input.Cast(), analyticsHopping, lateArrivalDelay, defaultWatermarksMode, syncActor); } else { return DqRewriteAggregate(node, ctx, TypesCtx, true, Config->UseAggPhases.Get().GetOrElse(false), Config->UseFinalizeByKey.Get().GetOrElse(false)); } @@ -508,382 +488,6 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { } private: - TMaybeNode RewriteAsHoppingWindow(const TExprBase node, TExprContext& ctx, const TDqConnection& input) { - const auto aggregate = node.Cast(); - const auto pos = aggregate.Pos(); - - YQL_CLOG(DEBUG, ProviderDq) << "OptimizeStreamingAggregate"; - - EnsureNotDistinct(aggregate); - - const auto maybeHopTraits = ExtractHopTraits(aggregate, ctx); - if (!maybeHopTraits) { - return nullptr; - } - const auto hopTraits = *maybeHopTraits; - - const auto aggregateInputType = GetSeqItemType(*node.Ptr()->Head().GetTypeAnn()).Cast(); - TKeysDescription keysDescription(*aggregateInputType, aggregate.Keys(), hopTraits.Column); - - if (keysDescription.NeedPickle()) { - return Build(ctx, pos) - .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) - .Input() - .InitFrom(aggregate) - .Input() - .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) - .Input(input) - .Build() - .Build() - .Done(); - } - - const auto keyLambda = keysDescription.GetKeySelector(ctx, pos, aggregateInputType); - const auto timeExtractorLambda = BuildTimeExtractor(hopTraits.Traits, ctx); - const auto initLambda = BuildInitHopLambda(aggregate, ctx); - const auto updateLambda = BuildUpdateHopLambda(aggregate, ctx); - const auto saveLambda = BuildSaveHopLambda(aggregate, ctx); - const auto loadLambda = BuildLoadHopLambda(aggregate, ctx); - const auto mergeLambda = BuildMergeHopLambda(aggregate, ctx); - const auto finishLambda = BuildFinishHopLambda(aggregate, keysDescription.GetActualGroupKeys(), hopTraits.Column, ctx); - const auto enableWatermarks = BuildWatermarkMode(aggregate, hopTraits.Traits, ctx); - if (!enableWatermarks) { - return nullptr; - } - - const auto streamArg = Build(ctx, pos).Name("stream").Done(); - auto multiHoppingCoreBuilder = Build(ctx, pos) - .KeyExtractor(keyLambda) - .TimeExtractor(timeExtractorLambda) - .Hop(hopTraits.Traits.Hop()) - .Interval(hopTraits.Traits.Interval()) - .DataWatermarks(hopTraits.Traits.DataWatermarks()) - .InitHandler(initLambda) - .UpdateHandler(updateLambda) - .MergeHandler(mergeLambda) - .FinishHandler(finishLambda) - .SaveHandler(saveLambda) - .LoadHandler(loadLambda) - .WatermarkMode().Build(ToString(*enableWatermarks)); - - if (*enableWatermarks) { - const auto hop = TDuration::MicroSeconds(hopTraits.Hop); - const auto lateArrivalDelay = TDuration::MilliSeconds(Config->WatermarksLateArrivalDelayMs - .Get() - .GetOrElse(TDqSettings::TDefault::WatermarksLateArrivalDelayMs)); - - multiHoppingCoreBuilder.Delay() - .Literal().Build(ToString(Max(hop, lateArrivalDelay).MicroSeconds())) - .Build(); - } else { - multiHoppingCoreBuilder.Delay(hopTraits.Traits.Delay()); - } - - if (Config->AnalyticsHopping.Get().GetOrElse(false)) { - return Build(ctx, node.Pos()) - .Input(input.Ptr()) - .KeySelectorLambda(keyLambda) - .SortDirections() - .Literal() - .Value("true") - .Build() - .Build() - .SortKeySelectorLambda(timeExtractorLambda) - .ListHandlerLambda() - .Args(streamArg) - .Body() - .Stream(multiHoppingCoreBuilder - .Input() - .List(streamArg) - .Build() - .Done()) - .Build() - .Build() - .Done(); - } else { - auto wrappedInput = input.Ptr(); - if (!keysDescription.MemberKeys.empty()) { - // Shuffle input connection by keys - wrappedInput = WrapToShuffle(keysDescription, aggregate, input, ctx); - if (!wrappedInput) { - return nullptr; - } - } - - const auto stage = Build(ctx, node.Pos()) - .Inputs() - .Add(wrappedInput) - .Build() - .Program() - .Args(streamArg) - .Body() - .Input(multiHoppingCoreBuilder - .Input() - .Input(streamArg) - .Build() - .Done()) - .Lambda(keysDescription.BuildUnpickleLambda(ctx, pos, *aggregateInputType)) - .Build() - .Build() - .Settings(TDqStageSettings().BuildNode(ctx, node.Pos())) - .Done(); - - return Build(ctx, node.Pos()) - .Output() - .Stage(stage) - .Index().Build(0) - .Build() - .Done(); - } - } - - struct THoppingTraits { - TString Column; - TCoHoppingTraits Traits; - ui64 Hop; - ui64 Interval; - ui64 Delay; - }; - - TMaybe ExtractHopTraits(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - - const auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); - if (!hopSetting) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Aggregate over stream must have 'hopping' setting")); - return Nothing(); - } - - const auto hoppingColumn = IsLegacyHopping(hopSetting) - ? "_yql_time" - : TString(hopSetting->Child(1)->Child(0)->Content()); - - const auto traitsNode = IsLegacyHopping(hopSetting) - ? hopSetting->Child(1) - : hopSetting->Child(1)->Child(1); - - const auto maybeTraits = TMaybeNode(traitsNode); - if (!maybeTraits) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Invalid 'hopping' setting in Aggregate")); - return Nothing(); - } - - const auto traits = maybeTraits.Cast(); - - const auto checkIntervalParam = [&] (TExprBase param) -> ui64 { - if (param.Maybe()) { - param = param.Cast().Input(); - } - if (!param.Maybe()) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Not an interval data ctor")); - return 0; - } - auto value = FromString(param.Cast().Literal().Value()); - if (value <= 0) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval value must be positive")); - return 0; - } - return (ui64)value; - }; - - const auto hop = checkIntervalParam(traits.Hop()); - if (!hop) { - return Nothing(); - } - const auto interval = checkIntervalParam(traits.Interval()); - if (!interval) { - return Nothing(); - } - const auto delay = checkIntervalParam(traits.Delay()); - if (!delay) { - return Nothing(); - } - - if (interval < hop) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Interval must be greater or equal then hop")); - return Nothing(); - } - if (delay < hop) { - ctx.AddError(TIssue(ctx.GetPosition(pos), "Delay must be greater or equal then hop")); - return Nothing(); - } - - const auto newTraits = Build(ctx, aggregate.Pos()) - .InitFrom(traits) - .DataWatermarks(Config->AnalyticsHopping.Get().GetOrElse(false) - ? ctx.NewAtom(aggregate.Pos(), "false") - : traits.DataWatermarks().Ptr()) - .Done(); - - return THoppingTraits { - hoppingColumn, - newTraits, - hop, - interval, - delay - }; - } - - struct TKeysDescription { - TVector PickleKeys; - TVector MemberKeys; - TVector FakeKeys; - - explicit TKeysDescription(const TStructExprType& rowType, const TCoAtomList& keys, const TString& hoppingColumn) { - for (const auto& key : keys) { - if (key.StringValue() == hoppingColumn) { - FakeKeys.emplace_back(key.StringValue()); - continue; - } - - const auto index = rowType.FindItem(key.StringValue()); - Y_ENSURE(index); - - auto itemType = rowType.GetItems()[*index]->GetItemType(); - if (RemoveOptionalType(itemType)->GetKind() == ETypeAnnotationKind::Data) { - MemberKeys.emplace_back(key.StringValue()); - continue; - } - - PickleKeys.emplace_back(key.StringValue()); - } - } - - TExprNode::TPtr BuildPickleLambda(TExprContext& ctx, TPositionHandle pos) const { - TCoArgument arg = Build(ctx, pos) - .Name("item") - .Done(); - - TExprBase body = arg; - - for (const auto& key : PickleKeys) { - const auto member = Build(ctx, pos) - .Name().Build(key) - .Struct(arg) - .Done() - .Ptr(); - - body = Build(ctx, pos) - .Struct(body) - .Name().Build(key) - .Item(ctx.NewCallable(pos, "StablePickle", { member })) - .Done(); - } - - return Build(ctx, pos) - .Args({arg}) - .Body(body) - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildUnpickleLambda(TExprContext& ctx, TPositionHandle pos, const TStructExprType& rowType) { - TCoArgument arg = Build(ctx, pos) - .Name("item") - .Done(); - - TExprBase body = arg; - - for (const auto& key : PickleKeys) { - const auto index = rowType.FindItem(key); - Y_ENSURE(index); - - auto itemType = rowType.GetItems().at(*index)->GetItemType(); - const auto member = Build(ctx, pos) - .Name().Build(key) - .Struct(arg) - .Done() - .Ptr(); - - body = Build(ctx, pos) - .Struct(body) - .Name().Build(key) - .Item(ctx.NewCallable(pos, "Unpickle", { ExpandType(pos, *itemType, ctx), member })) - .Done(); - } - - return Build(ctx, pos) - .Args({arg}) - .Body(body) - .Done() - .Ptr(); - } - - TVector GetKeysList(TExprContext& ctx, TPositionHandle pos) const { - TVector res; - res.reserve(PickleKeys.size() + MemberKeys.size()); - - for (const auto& pickleKey : PickleKeys) { - res.emplace_back(Build(ctx, pos).Value(pickleKey).Done()); - } - for (const auto& memberKey : MemberKeys) { - res.emplace_back(Build(ctx, pos).Value(memberKey).Done()); - } - return res; - } - - TVector GetActualGroupKeys() { - TVector result; - result.reserve(PickleKeys.size() + MemberKeys.size()); - result.insert(result.end(), PickleKeys.begin(), PickleKeys.end()); - result.insert(result.end(), MemberKeys.begin(), MemberKeys.end()); - return result; - } - - bool NeedPickle() const { - return !PickleKeys.empty(); - } - - TExprNode::TPtr GetKeySelector(TExprContext& ctx, TPositionHandle pos, const TStructExprType* rowType) { - auto builder = Build(ctx, pos); - for (auto key : GetKeysList(ctx, pos)) { - builder.Add(std::move(key)); - } - return BuildKeySelector(pos, *rowType, builder.Build().Value().Ptr(), ctx); - } - }; - - TExprNode::TPtr WrapToShuffle( - const TKeysDescription& keysDescription, - const TCoAggregate& aggregate, - const TDqConnection& input, - TExprContext& ctx) - { - auto pos = aggregate.Pos(); - - TDqStageBase mappedInput = input.Output().Stage(); - if (keysDescription.NeedPickle()) { - mappedInput = Build(ctx, pos) - .Inputs() - .Add() - .Output() - .Stage(input.Output().Stage()) - .Index(input.Output().Index()) - .Build() - .Build() - .Build() - .Program() - .Args({"stream"}) - .Body() - .Input("stream") - .Lambda(keysDescription.BuildPickleLambda(ctx, pos)) - .Build() - .Build() - .Settings(TDqStageSettings().BuildNode(ctx, pos)) - .Done(); - } - - return Build(ctx, pos) - .Output() - .Stage(mappedInput) - .Index().Value("0").Build() - .Build() - .KeyColumns() - .Add(keysDescription.GetKeysList(ctx, pos)) - .Build() - .Done() - .Ptr(); - } void EnsureNotDistinct(const TCoAggregate& aggregate) { const auto& aggregateHandlers = aggregate.Handlers(); @@ -893,360 +497,6 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { "Distinct is not supported for aggregation with hop"); } - TExprNode::TPtr BuildTimeExtractor(const TCoHoppingTraits& hoppingTraits, TExprContext& ctx) { - const auto pos = hoppingTraits.Pos(); - - if (hoppingTraits.ItemType().Ref().GetTypeAnn()->Cast()->GetType()->Cast()->GetSize() == 0) { - // The case when no fields are used in lambda. F.e. when it has only DependsOn. - return ctx.DeepCopyLambda(hoppingTraits.TimeExtractor().Ref()); - } - - return Build(ctx, pos) - .Args({"item"}) - .Body() - .Apply(hoppingTraits.TimeExtractor()) - .With(0) - .Type(hoppingTraits.ItemType()) - .Value("item") - .Build() - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildInitHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto& aggregateHandlers = aggregate.Handlers(); - - const auto initItemArg = Build(ctx, pos).Name("item").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - ui32 index = 0; - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - - TMaybeNode applier; - if (tuple.Trait().Cast().InitHandler().Args().Size() == 1) { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().InitHandler()) - .With(0, initItemArg) - .Done(); - } else { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().InitHandler()) - .With(0, initItemArg) - .With(1) - .Literal().Build(ToString(index)) - .Build() - .Done(); - } - - structItems.push_back(Build(ctx, pos) - .Name().Build(BuildColumnName(tuple.ColumnName())) - .Value(applier) - .Done()); - ++index; - } - - return Build(ctx, pos) - .Args({initItemArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildUpdateHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - const auto updateItemArg = Build(ctx, pos).Name("item").Done(); - const auto updateStateArg = Build(ctx, pos).Name("state").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - i32 index = 0; - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(updateStateArg) - .Name().Build(columnName) - .Done(); - - TMaybeNode applier; - if (tuple.Trait().Cast().UpdateHandler().Args().Size() == 2) { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().UpdateHandler()) - .With(0, updateItemArg) - .With(1, member) - .Done(); - } else { - applier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().UpdateHandler()) - .With(0, updateItemArg) - .With(1, member) - .With(2) - .Literal().Build(ToString(index)) - .Build() - .Done(); - } - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value(applier) - .Done()); - ++index; - } - - return Build(ctx, pos) - .Args({updateItemArg, updateStateArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildMergeHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto& aggregateHandlers = aggregate.Handlers(); - - const auto mergeState1Arg = Build(ctx, pos).Name("state1").Done(); - const auto mergeState2Arg = Build(ctx, pos).Name("state2").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member1 = Build(ctx, pos) - .Struct(mergeState1Arg) - .Name().Build(columnName) - .Done(); - const auto member2 = Build(ctx, pos) - .Struct(mergeState2Arg) - .Name().Build(columnName) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value() - .Apply(tuple.Trait().Cast().MergeHandler()) - .With(0, member1) - .With(1, member2) - .Build() - .Done()); - } - - return Build(ctx, pos) - .Args({mergeState1Arg, mergeState2Arg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildFinishHopLambda( - const TCoAggregate& aggregate, - const TVector& actualGroupKeys, - const TString& hoppingColumn, - TExprContext& ctx) - { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - const auto finishKeyArg = Build(ctx, pos).Name("key").Done(); - const auto finishStateArg = Build(ctx, pos).Name("state").Done(); - const auto finishTimeArg = Build(ctx, pos).Name("time").Done(); - - TVector structItems; - structItems.reserve(actualGroupKeys.size() + aggregateHandlers.Size() + 1); - - if (actualGroupKeys.size() == 1) { - structItems.push_back(Build(ctx, pos) - .Name().Build(actualGroupKeys[0]) - .Value(finishKeyArg) - .Done()); - } else { - for (size_t i = 0; i < actualGroupKeys.size(); ++i) { - structItems.push_back(Build(ctx, pos) - .Name().Build(actualGroupKeys[i]) - .Value() - .Tuple(finishKeyArg) - .Index() - .Value(ToString(i)) - .Build() - .Build() - .Done()); - } - } - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString compoundColumnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(finishStateArg) - .Name().Build(compoundColumnName) - .Done(); - - if (tuple.ColumnName().Maybe()) { - structItems.push_back(Build(ctx, pos) - .Name().Build(compoundColumnName) - .Value() - .Apply(tuple.Trait().Cast().FinishHandler()) - .With(0, member) - .Build() - .Done()); - - continue; - } - - if (const auto namesList = tuple.ColumnName().Maybe()) { - const auto expApplier = Build(ctx, pos) - .Apply(tuple.Trait().Cast().FinishHandler()) - .With(0, member) - .Done(); - - int index = 0; - for (const auto columnName : namesList.Cast()) { - const auto extracter = Build(ctx, pos) - .Tuple(expApplier) - .Index().Build(index++) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name(columnName) - .Value(extracter) - .Done()); - } - - continue; - } - - YQL_ENSURE(false, "Invalid node. Expected Atom or AtomList, but received: " - << tuple.ColumnName().Ptr()->Dump()); - } - - structItems.push_back(Build(ctx, pos) - .Name().Build(hoppingColumn) - .Value(finishTimeArg) - .Done()); - - return Build(ctx, pos) - .Args({finishKeyArg, finishStateArg, finishTimeArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildSaveHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - const auto saveStateArg = Build(ctx, pos).Name("state").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(saveStateArg) - .Name().Build(columnName) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value() - .Apply(tuple.Trait().Cast().SaveHandler()) - .With(0, member) - .Build() - .Done()); - } - - return Build(ctx, pos) - .Args({saveStateArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TExprNode::TPtr BuildLoadHopLambda(const TCoAggregate& aggregate, TExprContext& ctx) { - const auto pos = aggregate.Pos(); - const auto aggregateHandlers = aggregate.Handlers(); - - TCoArgument loadStateArg = Build(ctx, pos).Name("state").Done(); - - TVector structItems; - structItems.reserve(aggregateHandlers.Size()); - - for (const auto& handler : aggregateHandlers) { - const auto tuple = handler.Cast(); - const TString columnName = BuildColumnName(tuple.ColumnName()); - - const auto member = Build(ctx, pos) - .Struct(loadStateArg) - .Name().Build(columnName) - .Done(); - - structItems.push_back(Build(ctx, pos) - .Name().Build(columnName) - .Value() - .Apply(tuple.Trait().Cast().LoadHandler()) - .With(0, member) - .Build() - .Done()); - } - - return Build(ctx, pos) - .Args({loadStateArg}) - .Body() - .Add(structItems) - .Build() - .Done() - .Ptr(); - } - - TMaybe BuildWatermarkMode( - const TCoAggregate& aggregate, - const TCoHoppingTraits& hoppingTraits, - TExprContext& ctx) - { - const auto analyticsMode = Config->AnalyticsHopping.Get().GetOrElse(false); - const bool enableWatermarks = !analyticsMode && - Config->WatermarksMode.Get() == "default" && - hoppingTraits.Version().Cast().StringValue() == "v2"; - if (enableWatermarks && Config->ComputeActorType.Get() != "async") { - ctx.AddError(TIssue(ctx.GetPosition(aggregate.Pos()), "Watermarks should be used only with async compute actor")); - return Nothing(); - } - - if (hoppingTraits.Version().Cast().StringValue() == "v2" && !enableWatermarks) { - ctx.AddError(TIssue( - ctx.GetPosition(aggregate.Pos()), - "HoppingWindow requires watermarks to be enabled. If you don't want to do that, you can use HOP instead.")); - return Nothing(); - } - - return enableWatermarks; - } - IDqOptimization* GetDqOptCallback(const TExprBase& providerRead) const { if (providerRead.Ref().ChildrenSize() > 1 && TCoDataSource::Match(providerRead.Ref().Child(1))) { auto dataSourceName = providerRead.Ref().Child(1)->Child(0)->Content(); diff --git a/ydb/library/yql/providers/dq/planner/execution_planner.cpp b/ydb/library/yql/providers/dq/planner/execution_planner.cpp index b265d529d904..e1b40aa26fdf 100644 --- a/ydb/library/yql/providers/dq/planner/execution_planner.cpp +++ b/ydb/library/yql/providers/dq/planner/execution_planner.cpp @@ -558,7 +558,7 @@ namespace NYql::NDqs { TString sourceType; if (dqSource) { sourceSettings.ConstructInPlace(); - dqIntegration->FillSourceSettings(*read, *sourceSettings, sourceType); + dqIntegration->FillSourceSettings(*read, *sourceSettings, sourceType, maxPartitions); YQL_ENSURE(!sourceSettings->type_url().empty(), "Data source provider \"" << dataSourceName << "\" did't fill dq source settings for its dq source node"); YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceName << "\" did't fill dq source settings type for its dq source node"); } diff --git a/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp b/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp index e92dc952a3d9..37d899baae6d 100644 --- a/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp +++ b/ydb/library/yql/providers/dq/runtime/task_command_executor.cpp @@ -127,13 +127,11 @@ class TTaskCommandExecutor { "TaskRunner", labels, name); - auto& old = CurrentJobStats[counterName]; if (name.EndsWith("Time")) { - QueryStat.AddTimeCounter(counterName, value - old); + QueryStat.SetTimeCounter(counterName, value); } else { - QueryStat.AddCounter(counterName, value - old); + QueryStat.SetCounter(counterName, value); } - old = value; } }); } @@ -766,7 +764,6 @@ class TTaskCommandExecutor { std::unique_ptr Alloc; NKikimr::NMiniKQL::TComputationNodeFactory ComputationFactory; TTaskTransformFactory TaskTransformFactory; - THashMap CurrentJobStats; NKikimr::NMiniKQL::IStatsRegistry* JobStats; bool TerminateOnError; TIntrusivePtr Runner; diff --git a/ydb/library/yql/providers/generic/actors/ya.make b/ydb/library/yql/providers/generic/actors/ya.make index 31ec4480c9ef..53f40afdca7c 100644 --- a/ydb/library/yql/providers/generic/actors/ya.make +++ b/ydb/library/yql/providers/generic/actors/ya.make @@ -3,15 +3,18 @@ LIBRARY() SRCS( yql_generic_read_actor.cpp yql_generic_source_factory.cpp + yql_generic_token_provider.cpp ) PEERDIR( ydb/library/yql/dq/actors/compute ydb/library/yql/minikql/computation + ydb/library/yql/providers/common/structured_token ydb/library/yql/providers/common/token_accessor/client ydb/library/yql/providers/generic/proto ydb/library/yql/public/types ydb/library/yql/providers/generic/connector/libcpp + ydb/public/sdk/cpp/client/ydb_types/credentials ) YQL_LAST_ABI_VERSION() diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp b/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp index 7de4f0a04ea1..51c02bb40456 100644 --- a/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp +++ b/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp @@ -1,4 +1,5 @@ #include "yql_generic_read_actor.h" +#include "yql_generic_token_provider.h" #include #include @@ -12,10 +13,10 @@ #include #include #include -#include #include #include #include +#include namespace NYql::NDq { @@ -102,16 +103,16 @@ namespace NYql::NDq { ui64 inputIndex, TCollectStatsLevel statsLevel, NConnector::IClient::TPtr client, - const NConnector::NApi::TSelect& select, - const NConnector::NApi::TDataSourceInstance& dataSourceInstance, + TGenericTokenProvider::TPtr tokenProvider, + Generic::TSource&& source, const NActors::TActorId& computeActorId, const NKikimr::NMiniKQL::THolderFactory& holderFactory) : InputIndex_(inputIndex) , ComputeActorId_(computeActorId) , Client_(std::move(client)) + , TokenProvider_(std::move(tokenProvider)) , HolderFactory_(holderFactory) - , Select_(select) - , DataSourceInstance_(dataSourceInstance) + , Source_(source) { IngressStats_.Level = statsLevel; } @@ -143,7 +144,9 @@ namespace NYql::NDq { // Prepare request NConnector::NApi::TListSplitsRequest request; - *request.mutable_selects()->Add() = Select_; + NConnector::NApi::TSelect select = Source_.select(); // copy TSelect from source + TokenProvider_->MaybeFillToken(*select.mutable_data_source_instance()); + *request.mutable_selects()->Add() = std::move(select); // Initialize stream Client_->ListSplits(request).Subscribe( @@ -236,8 +239,11 @@ namespace NYql::NDq { std::for_each( Splits_.cbegin(), Splits_.cend(), - [&](const NConnector::NApi::TSplit& split) { request.mutable_splits()->Add()->CopyFrom(split); }); - request.mutable_data_source_instance()->CopyFrom(DataSourceInstance_); + [&](const NConnector::NApi::TSplit& split) { + NConnector::NApi::TSplit splitCopy = split; + TokenProvider_->MaybeFillToken(*splitCopy.mutable_select()->mutable_data_source_instance()); + *request.mutable_splits()->Add() = std::move(split); + }); // Start streaming Client_->ReadSplits(request).Subscribe( @@ -403,8 +409,8 @@ namespace NYql::NDq { // It's very important to fill UV columns in the alphabet order, // paying attention to the scalar field containing block length. TVector fieldNames; - std::transform(Select_.what().items().cbegin(), - Select_.what().items().cend(), + std::transform(Source_.select().what().items().cbegin(), + Source_.select().what().items().cend(), std::back_inserter(fieldNames), [](const auto& item) { return item.column().name(); }); @@ -484,6 +490,7 @@ namespace NYql::NDq { const NActors::TActorId ComputeActorId_; NConnector::IClient::TPtr Client_; + TGenericTokenProvider::TPtr TokenProvider_; NConnector::IListSplitsStreamIterator::TPtr ListSplitsIterator_; TVector Splits_; // accumulated list of table splits NConnector::IReadSplitsStreamIterator::TPtr ReadSplitsIterator_; @@ -492,22 +499,21 @@ namespace NYql::NDq { NKikimr::NMiniKQL::TPlainContainerCache ArrowRowContainerCache_; const NKikimr::NMiniKQL::THolderFactory& HolderFactory_; - const NYql::NConnector::NApi::TSelect Select_; - const NYql::NConnector::NApi::TDataSourceInstance DataSourceInstance_; + Generic::TSource Source_; }; std::pair CreateGenericReadActor(NConnector::IClient::TPtr genericClient, - Generic::TSource&& params, + Generic::TSource&& source, ui64 inputIndex, TCollectStatsLevel statsLevel, const THashMap& /*secureParams*/, const THashMap& /*taskParams*/, const NActors::TActorId& computeActorId, - ISecuredServiceAccountCredentialsFactory::TPtr /*credentialsFactory*/, + ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory, const NKikimr::NMiniKQL::THolderFactory& holderFactory) { - const auto dsi = params.select().data_source_instance(); + const auto dsi = source.select().data_source_instance(); YQL_CLOG(INFO, ProviderGeneric) << "Creating read actor with params:" << " kind=" << NYql::NConnector::NApi::EDataSourceKind_Name(dsi.kind()) << ", endpoint=" << dsi.endpoint().ShortDebugString() @@ -526,6 +532,7 @@ namespace NYql::NDq { YQL_ENSURE(one != TString::npos && two != TString::npos && one < two, "Bad token format:" << token); */ + // Obtain token to access remote data source if necessary // TODO: partitioning is not implemented now, but this code will be useful for the further research: /* TStringBuilder part; @@ -539,12 +546,14 @@ namespace NYql::NDq { part << ';'; */ + auto tokenProvider = CreateGenericTokenProvider(source, credentialsFactory); + const auto actor = new TGenericReadActor( inputIndex, statsLevel, genericClient, - params.select(), - dsi, + std::move(tokenProvider), + std::move(source), computeActorId, holderFactory); diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp new file mode 100644 index 000000000000..e8430b87e9ec --- /dev/null +++ b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.cpp @@ -0,0 +1,67 @@ +#include "yql_generic_token_provider.h" + +#include + +namespace NYql::NDq { + TGenericTokenProvider::TGenericTokenProvider( + const NYql::Generic::TSource& source, const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory) + : Source_(source) + , StaticIAMToken_(source.GetToken()) + , CredentialsProvider_(nullptr) + { + // 1. User has provided IAM-token itself. + // This token will be used during the whole lifetime of a read actor. + if (!StaticIAMToken_.empty()) { + return; + } + + // 2. User has provided service account creds. + // We create token accessor client that will renew token accessor by demand. + if (source.GetServiceAccountId() && source.GetServiceAccountIdSignature()) { + Y_ENSURE(credentialsFactory, "CredentialsFactory is not initialized"); + + auto structuredTokenJSON = + TStructuredTokenBuilder() + .SetServiceAccountIdAuth(source.GetServiceAccountId(), source.GetServiceAccountIdSignature()) + .ToJson(); + + // If service account is provided, obtain IAM-token + Y_ENSURE(structuredTokenJSON, "empty structured token"); + + auto credentialsProviderFactory = + CreateCredentialsProviderFactoryForStructuredToken(credentialsFactory, structuredTokenJSON, false); + CredentialsProvider_ = credentialsProviderFactory->CreateProvider(); + } + + // 3. If we reached this point, it means that user doesn't need token auth. + } + + void TGenericTokenProvider::MaybeFillToken(NConnector::NApi::TDataSourceInstance& dsi) const { + // 1. Don't need tokens if basic auth is set + if (dsi.credentials().has_basic()) { + return; + } + + *dsi.mutable_credentials()->mutable_token()->mutable_type() = "IAM"; + + // 2. If static IAM-token has been provided, use it + if (!StaticIAMToken_.empty()) { + *dsi.mutable_credentials()->mutable_token()->mutable_value() = StaticIAMToken_; + return; + } + + // 3. Otherwise use credentials provider to get token + Y_ENSURE(CredentialsProvider_, "CredentialsProvider is not initialized"); + + auto iamToken = CredentialsProvider_->GetAuthInfo(); + Y_ENSURE(iamToken, "CredentialsProvider returned empty IAM token"); + + *dsi.mutable_credentials()->mutable_token()->mutable_value() = std::move(iamToken); + } + + TGenericTokenProvider::TPtr + CreateGenericTokenProvider(const NYql::Generic::TSource& source, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory) { + return std::make_unique(source, credentialsFactory); + } +} //namespace NYql::NDq diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h new file mode 100644 index 000000000000..495a44c15e57 --- /dev/null +++ b/ydb/library/yql/providers/generic/actors/yql_generic_token_provider.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +namespace NYql::NDq { + // When accessing external data sources using authentication via tokens, + // there are two options: + // 1. Use static IAM-token provided by user (especially useful during debugging); + // 2. Use service account credentials in order to get (and refresh) IAM-token by demand. + class TGenericTokenProvider { + public: + using TPtr = std::unique_ptr; + + TGenericTokenProvider(const NYql::Generic::TSource& source, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory); + + void MaybeFillToken(NConnector::NApi::TDataSourceInstance& dsi) const; + + private: + NYql::Generic::TSource Source_; + TString StaticIAMToken_; + NYdb::TCredentialsProviderPtr CredentialsProvider_; + }; + + TGenericTokenProvider::TPtr + CreateGenericTokenProvider(const NYql::Generic::TSource& source, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory); +} //namespace NYql::NDq diff --git a/ydb/library/yql/providers/generic/connector/api/common/data_source.proto b/ydb/library/yql/providers/generic/connector/api/common/data_source.proto index ecfb64665c9e..d0f700280c09 100644 --- a/ydb/library/yql/providers/generic/connector/api/common/data_source.proto +++ b/ydb/library/yql/providers/generic/connector/api/common/data_source.proto @@ -32,6 +32,8 @@ enum EDataSourceKind { POSTGRESQL = 2; S3 = 3; YDB = 4; + MYSQL = 5; + MS_SQL_SERVER = 6; } // EProtocol generalizes various kinds of network protocols supported by different databases. diff --git a/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto b/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto index 67cd9588547c..7004f2686136 100644 --- a/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto +++ b/ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto @@ -197,8 +197,10 @@ message TSplit { // ReadDataRequest reads the data associated with a particular table split. message TReadSplitsRequest { - // Data source instance to connect - TDataSourceInstance data_source_instance = 1; + // Data source instance to connect. + // Deprecated field: server implementations must rely on + // TDataSourceInstance provided in each TSelect. + TDataSourceInstance data_source_instance = 1 [deprecated = true]; // Splits that YQ engine would like to read. repeated TSplit splits = 2; diff --git a/ydb/library/yql/providers/generic/connector/libcpp/client.cpp b/ydb/library/yql/providers/generic/connector/libcpp/client.cpp index 9d6237808377..8280a4e36886 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/client.cpp +++ b/ydb/library/yql/providers/generic/connector/libcpp/client.cpp @@ -1,3 +1,5 @@ +#include + #include "client.h" namespace NYql::NConnector { @@ -21,10 +23,22 @@ namespace NYql::NConnector { public: TClientGRPC() = delete; TClientGRPC(const TGenericConnectorConfig& config) { - TString endpoint = TStringBuilder() << config.GetEndpoint().host() << ":" << ToString(config.GetEndpoint().port()); - GrpcConfig_ = NYdbGrpc::TGRpcClientConfig(endpoint); + GrpcConfig_ = NYdbGrpc::TGRpcClientConfig(); + + Y_ENSURE(config.GetEndpoint().host(), TStringBuilder() << "Empty host in TGenericConnectorConfig: " << config.DebugString()); + Y_ENSURE(config.GetEndpoint().port(), TStringBuilder() << "Empty port in TGenericConnectorConfig: " << config.DebugString()); + GrpcConfig_.Locator = TStringBuilder() << config.GetEndpoint().host() << ":" << config.GetEndpoint().port(); + GrpcConfig_.EnableSsl = config.GetUseSsl(); + // Read content of CA cert + TString rootCertData; + if (config.GetSslCaCrt()) { + rootCertData = TFileInput(config.GetSslCaCrt()).ReadAll(); + } + + GrpcConfig_.SslCredentials = grpc::SslCredentialsOptions{.pem_root_certs = rootCertData, .pem_private_key = "", .pem_cert_chain = ""}; + GrpcClient_ = std::make_unique(); // FIXME: is it OK to use single connection during the client lifetime? diff --git a/ydb/library/yql/providers/generic/connector/libcpp/client.h b/ydb/library/yql/providers/generic/connector/libcpp/client.h index 1d066e31b72a..7a2250798eb8 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/client.h +++ b/ydb/library/yql/providers/generic/connector/libcpp/client.h @@ -17,8 +17,6 @@ namespace NYql::NConnector { using TAsyncResult = NThreading::TFuture>; using TDescribeTableAsyncResult = TAsyncResult; - // using TListSplitsAsyncResult = TAsyncResult; - // using TReadSplitsAsyncResult = TAsyncResult; template class TStreamer { diff --git a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp index 09fbd4f8c599..7eed47039379 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp +++ b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.cpp @@ -18,6 +18,5 @@ namespace NYql::NConnector::NTest { extern const TString DEFAULT_CH_SERVICE_ACCOUNT_ID_SIGNATURE = "sa_signature"; extern const TString DEFAULT_YDB_HOST = "localhost"; - extern const TString DEFAULT_YDB_DATABASE = "local"; extern const TString DEFAULT_YDB_ENDPOINT = TStringBuilder() << DEFAULT_YDB_HOST << ':' << DEFAULT_YDB_PORT; } // namespace NYql::NConnector::NTest diff --git a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h index f5e28b3e9f37..bbca9127a4bd 100644 --- a/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h +++ b/ydb/library/yql/providers/generic/connector/libcpp/ut_helpers/defaults.h @@ -26,7 +26,6 @@ namespace NYql::NConnector::NTest { extern const TString DEFAULT_CH_SERVICE_ACCOUNT_ID; extern const TString DEFAULT_CH_SERVICE_ACCOUNT_ID_SIGNATURE; - extern const TString DEFAULT_YDB_DATABASE; extern const TString DEFAULT_YDB_HOST; constexpr int DEFAULT_YDB_PORT = 2136; extern const TString DEFAULT_YDB_ENDPOINT; diff --git a/ydb/library/yql/providers/generic/connector/tests/README.md b/ydb/library/yql/providers/generic/connector/tests/README.md new file mode 100644 index 000000000000..a474c36e673d --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/README.md @@ -0,0 +1,25 @@ +# Contribution guide + +When extending YDB Federated Query list of supported external datasources with new database / storage / whatever, +it's crucial to write integration tests. There's a kind of template for these tests consisting of: + +* Test scenario (`CREATE TABLE` / `INSERT` / `SELECT` and so on). +* Test cases parametrizing the scenario. +* Infrastructure code responsible for deploying the external datasource as the dockerized service. + +The basic scenario typically should imply the following steps: +1. Deploy datasource and connector services using `docker-compose`. +1. Initialize datasource in any way you like (either with predefined `*.sql` files mounted into container or programmatically from the test app). +1. Prepare `YQL` script to query data from the datasource. +1. Execute script with YDB-based CLI tools: `dqrun` and `kqprun`. +1. Validate output. + +## Directory structure + +* `common_test_cases` keeps basic test cases that can be used for testing any data source. +* `datasource` contains subfolders (`datasource/clickhouse`, `datasource/postgresql`, etc) with datasource-specific tests scenarios, test cases and `docker-compose.yml` file that is required to set up test environment. +* `join` contains tests checking cross-datasource scenarios. +* `utils` contains building blocks for tests: + * `utils/clients` stores code performing network IO; + * `utils/scenario` describes the typical scenarios of the data source usage (e. g. creating table, fullfilling it with test data etc.); + * `utils/types` describes the external data source's type system. diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/base.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/base.py similarity index 76% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/base.py rename to ydb/library/yql/providers/generic/connector/tests/common_test_cases/base.py index 4e8902a0372e..fb3c11e84f1f 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/base.py +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/base.py @@ -7,7 +7,6 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat from ydb.library.yql.providers.generic.connector.tests.utils.database import Database -from ydb.library.yql.providers.generic.connector.tests.utils.data_source_kind import data_source_kind_alias from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings @@ -20,7 +19,7 @@ class BaseTestCase: @property def name(self) -> str: - return f'{self.name_}_{data_source_kind_alias(self.data_source_kind)}_{EProtocol.Name(self.protocol)}' + return f'{self.name_}_{EProtocol.Name(self.protocol)}' @property def database(self) -> Database: @@ -35,7 +34,13 @@ def _table_name(self) -> str: In general, we cannot use test case name as table name because of special symbols, so we provide a random table name instead. ''' - return 't' + hashlib.sha256(str(random.randint(0, 65536)).encode('ascii')).hexdigest()[:8] + match self.data_source_kind: + case EDataSourceKind.POSTGRESQL: + return 't' + hashlib.sha256(str(random.randint(0, 65536)).encode('ascii')).hexdigest()[:8] + case EDataSourceKind.CLICKHOUSE: + return 't' + hashlib.sha256(str(random.randint(0, 65536)).encode('ascii')).hexdigest()[:8] + case EDataSourceKind.YDB: + return self.name @property def sql_table_name(self) -> str: @@ -70,5 +75,10 @@ def generic_settings(self) -> GenericSettings: clickhouse_clusters=[], postgresql_clusters=[GenericSettings.PostgreSQLCluster(database=self.database.name, schema=None)], ) + case EDataSourceKind.YDB: + return GenericSettings( + date_time_format=EDateTimeFormat.YQL_FORMAT, + ydb_clusters=[GenericSettings.YdbCluster(database=self.database.name)], + ) case _: raise Exception(f'invalid data source: {self.data_source_kind}') diff --git a/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py new file mode 100644 index 000000000000..ec47a556c42c --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_database.py @@ -0,0 +1,25 @@ +from typing import List + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase + + +TestCase = BaseTestCase + + +class Factory: + def make_test_cases(self, data_source_kind: EDataSourceKind) -> List[TestCase]: + test_cases = [] + + test_case_name = 'missing_database' + + test_case = TestCase( + name_=test_case_name, + data_source_kind=data_source_kind, + protocol=EProtocol.NATIVE, + pragmas=dict(), + ) + + test_cases.append(test_case) + + return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py new file mode 100644 index 000000000000..3a2501e35480 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_missing_table.py @@ -0,0 +1,25 @@ +from typing import List + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase + + +TestCase = BaseTestCase + + +class Factory: + def make_test_cases(self, data_source_kind: EDataSourceKind) -> List[TestCase]: + test_cases = [] + + test_case_name = 'missing_table' + + test_case = TestCase( + name_=test_case_name, + data_source_kind=data_source_kind, + protocol=EProtocol.NATIVE, + pragmas=dict(), + ) + + test_cases.append(test_case) + + return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_common.py b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_positive_common.py similarity index 90% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_common.py rename to ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_positive_common.py index 714316837b4d..fff27b4cb4aa 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_common.py +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/select_positive_common.py @@ -7,8 +7,9 @@ from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings from ydb.library.yql.providers.generic.connector.tests.utils.generate import generate_table_data -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.ydb as Ydb from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -18,7 +19,7 @@ SelectWhere, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings @@ -68,12 +69,12 @@ def _column_selection(self) -> Sequence[TestCase]: Column( name='COL1', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4(), ydb=Ydb.Int32()), ), Column( name='col2', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4(), ydb=Ydb.Int32()), ), ) ) @@ -90,6 +91,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL, + EDataSourceKind.YDB, ), ), # SELECT COL1 FROM table @@ -102,6 +104,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, # NOTE: YQ-2264: doesn't work for PostgreSQL because of implicit cast to lowercase (COL1 -> col1) + EDataSourceKind.YDB, ), ), # SELECT col1 FROM table @@ -123,6 +126,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL, + EDataSourceKind.YDB, ), ), # SELECT col2, COL1 FROM table @@ -135,6 +139,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, # NOTE: YQ-2264: doesn't work for PostgreSQL because of implicit cast to lowercase (COL1 -> col1) + EDataSourceKind.YDB, ), ), # SELECT col2, col1 FROM table @@ -157,6 +162,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, # NOTE: YQ-2264: doesn't work for PostgreSQL because of implicit cast to lowercase (COL1 -> col1) + EDataSourceKind.YDB, ), ), # Select the same column multiple times with different aliases @@ -176,6 +182,7 @@ def _column_selection(self) -> Sequence[TestCase]: ( EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL, + EDataSourceKind.YDB, ), ), ) @@ -235,7 +242,6 @@ def _large_table(self) -> Sequence[TestCase]: ) data_in = generate_table_data(schema=schema, bytes_soft_limit=table_size) - print("BIRD", data_in) # Assuming that request will look something like: # @@ -272,14 +278,20 @@ def make_test_cases(self, data_source_kind: EDataSourceKind) -> Sequence[TestCas protocols = { EDataSourceKind.CLICKHOUSE: [EProtocol.NATIVE, EProtocol.HTTP], EDataSourceKind.POSTGRESQL: [EProtocol.NATIVE], + EDataSourceKind.YDB: [EProtocol.NATIVE], } - base_test_cases = list( - itertools.chain( - self._column_selection(), - self._large_table(), + base_test_cases = None + + if data_source_kind == EDataSourceKind.YDB: + base_test_cases = self._column_selection() + else: + base_test_cases = list( + itertools.chain( + self._column_selection(), + self._large_table(), + ) ) - ) test_cases = [] for base_tc in base_test_cases: diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/ya.make b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/ya.make similarity index 66% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/ya.make rename to ydb/library/yql/providers/generic/connector/tests/common_test_cases/ya.make index 211106748c43..49991fd287f1 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/common_test_cases/ya.make @@ -1,18 +1,10 @@ PY3_LIBRARY() -STYLE_PYTHON() - PY_SRCS( base.py - collection.py - join.py - select_datetime.py select_missing_database.py select_missing_table.py - select_positive_clickhouse.py select_positive_common.py - select_positive_postgresql.py - select_positive_postgresql_schema.py ) PEERDIR( diff --git a/ydb/library/yql/providers/generic/connector/tests/conftest.py b/ydb/library/yql/providers/generic/connector/tests/conftest.py deleted file mode 100644 index e89287fd8ccf..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/conftest.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import TypeAlias - -import grpc -import pytest - -import ydb.library.yql.providers.generic.connector.api.service.connector_pb2_grpc as api -import yatest.common as yat - -from utils.settings import Settings -import utils.clickhouse -from utils.dqrun import DqRunner -from utils.kqprun import KqpRunner -from utils.runner import Runner -import utils.postgresql - - -@pytest.fixture -def settings() -> Settings: - return Settings.from_env() - - -@pytest.fixture -def clickhouse_client(settings) -> utils.clickhouse.Client: - client = utils.clickhouse.make_client(settings.clickhouse) - yield client - client.close() - - -@pytest.fixture -def postgresql_client(settings) -> utils.postgresql.Client: - return utils.postgresql.Client(settings.postgresql) - - -ConnectorClient: TypeAlias = api.ConnectorStub - - -@pytest.fixture -def connector_client(settings) -> ConnectorClient: - s = settings.connector - - channel = grpc.insecure_channel(f'{s.host}:{s.port}') - stub = ConnectorClient(channel) - return stub - - -def configure_runner(runner, settings) -> Runner: - if runner is DqRunner: - return DqRunner(dqrun_path=yat.build_path("ydb/library/yql/tools/dqrun/dqrun"), settings=settings) - elif runner is KqpRunner: - return KqpRunner(kqprun_path=yat.build_path("ydb/tests/tools/kqprun/kqprun"), settings=settings) - return None diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py new file mode 100644 index 000000000000..b8f4d5d442f2 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/collection.py @@ -0,0 +1,37 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind + +# test cases +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import select_positive +import select_datetime + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + 'select_missing_database': select_missing_database.Factory().make_test_cases(EDataSourceKind.CLICKHOUSE), + 'select_missing_table': select_missing_table.Factory().make_test_cases(EDataSourceKind.CLICKHOUSE), + 'select_positive': select_positive.Factory().make_test_cases() + + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.CLICKHOUSE), + 'select_datetime': select_datetime.Factory().make_test_cases(), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py new file mode 100644 index 000000000000..6f31ef8d80cd --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/conftest.py @@ -0,0 +1,22 @@ +from typing import Final +import pathlib + +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client, make_client + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse") + + +@pytest.fixture +def settings() -> Settings: + return Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.CLICKHOUSE]) + + +@pytest.fixture +def clickhouse_client(settings) -> Client: + cl = make_client(settings.clickhouse) + yield cl + cl.close() diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml new file mode 100644 index 000000000000..16f0eb3ee86b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml @@ -0,0 +1,20 @@ +version: '3.4' +services: + clickhouse: + image: clickhouse/clickhouse-server:23-alpine@sha256:b078c1cd294632afa2aeba3530e7ba2e568513da23304354f455a25fab575c06 + container_name: fq-tests-ch-clickhouse + environment: + CLICKHOUSE_DB: db + CLICKHOUSE_USER: user + CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 + CLICKHOUSE_PASSWORD: password + ports: + - 9000 + - 8123 + fq-connector-go: + container_name: fq-tests-ch-fq-connector-go + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.5@sha256:7f086ce3869b84a59fd76a10a9de8125c0d382915e956d34832105e03829a61b + volumes: + - ../../fq-connector-go/:/opt/ydb/cfg/ + ports: + - 2130 diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_datetime.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_datetime.py similarity index 70% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_datetime.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_datetime.py index 22165fa715bb..95c390cf2507 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_datetime.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_datetime.py @@ -6,9 +6,8 @@ from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common as select_positive_common -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -173,68 +172,6 @@ def _make_test_yql_clickhouse(self) -> TestCase: pragmas=dict(), ) - def _make_test_yql_postgresql(self) -> TestCase: - schema = Schema( - columns=ColumnList( - Column( - name='col_0_id', - ydb_type=Type.INT32, - data_source_type=DataSourceType(pg=postgresql.Int4()), - ), - # TODO: timestamp - Column( - name='col_1_datetime64', - ydb_type=Type.TIMESTAMP, - data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), - ), - ), - ) - data_in = [ - # Date is OK for CH, but too early for YQL - [ - 1, - datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), - ], - # Date is OK for both CH and YQL - [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], - # Date is OK for CH, but too late for YQL - [ - 3, - datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), - ], - ] - - data_out = [ - [ - 1, - None, - ], - [ - 2, - # datetime.datetime(1988, 11, 20, 12, 23, 45, 678000).astimezone(ZoneInfo('UTC')).replace(tzinfo=None), - datetime.datetime(1988, 11, 20, 12, 23, 45, 678910), - ], - [ - 3, - None, - ], - ] - - test_case_name = self._name + '_YQL' - - return TestCase( - name_=test_case_name, - date_time_format=EDateTimeFormat.YQL_FORMAT, - data_in=data_in, - data_out_=data_out, - select_what=SelectWhat.asterisk(schema.columns), - select_where=None, - data_source_kind=EDataSourceKind.POSTGRESQL, - protocol=EProtocol.NATIVE, - schema=schema, - pragmas=dict(), - ) - def _make_test_string_clickhouse(self) -> TestCase: schema = Schema( columns=ColumnList( @@ -344,68 +281,8 @@ def _make_test_string_clickhouse(self) -> TestCase: pragmas=dict(), ) - def _make_test_string_postgresql(self) -> TestCase: - schema = Schema( - columns=ColumnList( - Column( - name='col_0_id', - ydb_type=Type.INT32, - data_source_type=DataSourceType(pg=postgresql.Int4()), - ), - # TODO: timestamp - Column( - name='col_1_datetime64', - ydb_type=Type.TIMESTAMP, - data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), - ), - ), - ) - data_in = [ - [ - 1, - datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), - ], - [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], - [ - 3, - datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), - ], - ] - - data_out = [ - [ - 1, - '1950-05-27T12:23:45.67891Z', - ], - [ - 2, - '1988-11-20T12:23:45.67891Z', - ], - [ - 3, - '2108-01-01T12:23:45.67891Z', - ], - ] - - test_case_name = self._name + '_string' - - return TestCase( - name_=test_case_name, - date_time_format=EDateTimeFormat.STRING_FORMAT, - data_in=data_in, - data_out_=data_out, - select_what=SelectWhat.asterisk(schema.columns), - select_where=None, - data_source_kind=EDataSourceKind.POSTGRESQL, - protocol=EProtocol.NATIVE, - schema=schema, - pragmas=dict(), - ) - def make_test_cases(self) -> Sequence[TestCase]: return [ self._make_test_yql_clickhouse(), - self._make_test_yql_postgresql(), self._make_test_string_clickhouse(), - self._make_test_string_postgresql(), ] diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_positive.py similarity index 98% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_clickhouse.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_positive.py index 19595cd96e84..de6af3d7cc51 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_clickhouse.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/select_positive.py @@ -6,7 +6,7 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -19,7 +19,7 @@ makeOptionalYdbTypeFromYdbType, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common import TestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common import TestCase class Factory: diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py new file mode 100644 index 000000000000..dad7d53bc4e3 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/test.py @@ -0,0 +1,104 @@ +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.clickhouse as scenario + +from conftest import docker_compose_dir +from collection import Collection + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + + +# Global collection of test cases dependent on environment +tc_collection = Collection( + Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.CLICKHOUSE]) +) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_positive'), ids=tc_collection.ids('select_positive')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_positive( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + test_name=request.node.name, settings=settings, runner=runner, client=clickhouse_client, test_case=test_case + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_database'), ids=tc_collection.ids('select_missing_database') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_missing_database( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_missing_database.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + settings=settings, + runner=runner, + client=clickhouse_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_table'), ids=tc_collection.ids('select_missing_table') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_missing_table( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_missing_table.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + test_name=request.node.name, + settings=settings, + runner=runner, + client=clickhouse_client, + test_case=test_case, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_datetime'), ids=tc_collection.ids('select_datetime')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clickhouse_client") +def test_select_datetime( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clickhouse_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + test_name=request.node.name, + test_case=test_case, + settings=settings, + runner=runner, + client=clickhouse_client, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make new file mode 100644 index 000000000000..87d435816814 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/ya.make @@ -0,0 +1,66 @@ +PY3TEST() + +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=clickhouse) + +IF (AUTOCHECK) + # Split tests to chunks only when they're running on different machines with distbuild, + # otherwise this directive will slow down local test execution. + # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. + FORK_SUBTESTS() + + # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose + TAG( + ya:external + ya:force_sandbox + ya:fat + ) + + REQUIREMENTS( + container:4467981730 + cpu:all + dns:dns64 + ) +ENDIF() + +INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) + +# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, +# which makes it impossible to run them during precommit checks on Github CI. +# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + +IF (OPENSOURCE) + SIZE(MEDIUM) + SET(TEST_TAGS_VALUE) + SET(TEST_REQUIREMENTS_VALUE) +ENDIF() + +TEST_SRCS( + collection.py + conftest.py + select_datetime.py + select_positive.py + test.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/api/service/protos + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py new file mode 100644 index 000000000000..da6a46144b77 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/collection.py @@ -0,0 +1,37 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import select_datetime +import select_positive +import select_positive_with_schema + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + 'select_missing_database': select_missing_database.Factory().make_test_cases(EDataSourceKind.POSTGRESQL), + 'select_missing_table': select_missing_table.Factory().make_test_cases(EDataSourceKind.POSTGRESQL), + 'select_positive': select_positive.Factory().make_test_cases() + + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.POSTGRESQL), + 'select_positive_with_schema': select_positive_with_schema.Factory().make_test_cases(), + 'select_datetime': select_datetime.Factory().make_test_cases(), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py new file mode 100644 index 000000000000..e2e36087059d --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/conftest.py @@ -0,0 +1,22 @@ +from typing import Final +import pathlib + +import pytest + + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client + + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/postgresql") + + +@pytest.fixture +def settings() -> Settings: + return Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.POSTGRESQL]) + + +@pytest.fixture +def postgresql_client(settings) -> Client: + return Client(settings.postgresql) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml new file mode 100644 index 000000000000..5808d9191470 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3.4' +services: + postgresql: + image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 + container_name: fq-tests-pg-postgresql + environment: + POSTGRES_DB: db + POSTGRES_USER: user + POSTGRES_PASSWORD: password + command: ["postgres", "-c", "log_statement=all", "-c", "log_connections=on", "-c", "log_disconnections=on"] + ports: + - 5432 + fq-connector-go: + container_name: fq-tests-pg-fq-connector-go + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.5@sha256:7f086ce3869b84a59fd76a10a9de8125c0d382915e956d34832105e03829a61b + volumes: + - ../../fq-connector-go/:/opt/ydb/cfg/ + ports: + - 2130 diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py new file mode 100644 index 000000000000..7c396bd5013a --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_datetime.py @@ -0,0 +1,163 @@ +from dataclasses import dataclass +import datetime +from typing import Sequence + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol +from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat +from ydb.public.api.protos.ydb_value_pb2 import Type + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql +from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( + Schema, + Column, + ColumnList, + DataSourceType, + SelectWhat, +) +from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings + + +@dataclass +class TestCase(select_positive_common.TestCase): + date_time_format: EDateTimeFormat = EDateTimeFormat.YQL_FORMAT + + @property + def data_out(self) -> Sequence: + return self.data_out_ if self.data_out_ else self.data_in + + @property + def generic_settings(self) -> GenericSettings: + gs = super().generic_settings + gs.date_time_format = self.date_time_format + return gs + + +class Factory: + _name = 'datetime' + + def _make_test_yql_postgresql(self) -> TestCase: + schema = Schema( + columns=ColumnList( + Column( + name='col_0_id', + ydb_type=Type.INT32, + data_source_type=DataSourceType(pg=postgresql.Int4()), + ), + # TODO: timestamp + Column( + name='col_1_datetime64', + ydb_type=Type.TIMESTAMP, + data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), + ), + ), + ) + data_in = [ + # Date is OK for CH, but too early for YQL + [ + 1, + datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), + ], + # Date is OK for both CH and YQL + [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], + # Date is OK for CH, but too late for YQL + [ + 3, + datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), + ], + ] + + data_out = [ + [ + 1, + None, + ], + [ + 2, + # datetime.datetime(1988, 11, 20, 12, 23, 45, 678000).astimezone(ZoneInfo('UTC')).replace(tzinfo=None), + datetime.datetime(1988, 11, 20, 12, 23, 45, 678910), + ], + [ + 3, + None, + ], + ] + + test_case_name = self._name + '_YQL' + + return TestCase( + name_=test_case_name, + date_time_format=EDateTimeFormat.YQL_FORMAT, + data_in=data_in, + data_out_=data_out, + select_what=SelectWhat.asterisk(schema.columns), + select_where=None, + data_source_kind=EDataSourceKind.POSTGRESQL, + protocol=EProtocol.NATIVE, + schema=schema, + pragmas=dict(), + ) + + def _make_test_string_postgresql(self) -> TestCase: + schema = Schema( + columns=ColumnList( + Column( + name='col_0_id', + ydb_type=Type.INT32, + data_source_type=DataSourceType(pg=postgresql.Int4()), + ), + # TODO: timestamp + Column( + name='col_1_datetime64', + ydb_type=Type.TIMESTAMP, + data_source_type=DataSourceType(pg=postgresql.TimestampWithoutTimeZone()), + ), + ), + ) + data_in = [ + [ + 1, + datetime.datetime(1950, 5, 27, 12, 23, 45, 678910), + ], + [2, datetime.datetime(1988, 11, 20, 12, 23, 45, 678910)], + [ + 3, + datetime.datetime(2108, 1, 1, 12, 23, 45, 678910), + ], + ] + + data_out = [ + [ + 1, + '1950-05-27T12:23:45.67891Z', + ], + [ + 2, + '1988-11-20T12:23:45.67891Z', + ], + [ + 3, + '2108-01-01T12:23:45.67891Z', + ], + ] + + test_case_name = self._name + '_string' + + return TestCase( + name_=test_case_name, + date_time_format=EDateTimeFormat.STRING_FORMAT, + data_in=data_in, + data_out_=data_out, + select_what=SelectWhat.asterisk(schema.columns), + select_where=None, + data_source_kind=EDataSourceKind.POSTGRESQL, + protocol=EProtocol.NATIVE, + schema=schema, + pragmas=dict(), + ) + + def make_test_cases(self) -> Sequence[TestCase]: + return [ + self._make_test_yql_postgresql(), + self._make_test_string_postgresql(), + ] diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive.py similarity index 98% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive.py index 6a213d01809b..ac605f6576ff 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive.py @@ -5,7 +5,7 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -16,7 +16,7 @@ makeOptionalYdbTypeFromTypeID, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common import TestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common import TestCase class Factory: diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql_schema.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive_with_schema.py similarity index 79% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql_schema.py rename to ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive_with_schema.py index b39192fdc76d..b2cb9bf457cf 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_positive_postgresql_schema.py +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/select_positive_with_schema.py @@ -4,13 +4,12 @@ from random import choice from string import ascii_lowercase, digits -from utils.settings import GenericSettings from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol from ydb.public.api.protos.ydb_value_pb2 import Type -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +from ydb.library.yql.providers.generic.connector.tests.utils.settings import GenericSettings +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( Schema, Column, @@ -18,7 +17,7 @@ DataSourceType, SelectWhat, ) -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase +from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase @dataclass @@ -51,19 +50,19 @@ def _select_with_pg_schema(self) -> Sequence[TestCase]: Column( name='COL1', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(pg=postgresql.Int4()), ), Column( name='col2', ydb_type=Type.INT32, - data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int4()), + data_source_type=DataSourceType(pg=postgresql.Int4()), ), ) ) select_what = SelectWhat.asterisk(column_list=schema.columns) - test_case_name = f'select_with_schema_{select_what}' + test_case_name = f'select_positive_with_schema_{select_what}' test_case = TestCase( name_=test_case_name, diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py new file mode 100644 index 000000000000..4483d6201d2f --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/test.py @@ -0,0 +1,133 @@ +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.postgresql as scenario + +from conftest import docker_compose_dir +from collection import Collection + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + + +# Global collection of test cases dependent on environment +tc_collection = Collection( + Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.POSTGRESQL]) +) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_positive'), ids=tc_collection.ids('select_positive')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_positive( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_database'), ids=tc_collection.ids('select_missing_database') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_missing_database( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_missing_database.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", tc_collection.get('select_missing_table'), ids=tc_collection.ids('select_missing_table') +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_missing_table( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_missing_table.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_missing_table( + test_name=request.node.name, + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_datetime'), ids=tc_collection.ids('select_datetime')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_datetime( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize( + "test_case", + tc_collection.get('select_positive_with_schema'), + ids=tc_collection.ids('select_positive_with_schema'), +) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("postgresql_client") +def test_select_positive_with_schema( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + postgresql_client: Client, + test_case: select_positive_common.TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_pg_schema( + settings=settings, + runner=runner, + client=postgresql_client, + test_case=test_case, + test_name=request.node.name, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make new file mode 100644 index 000000000000..9c622fcf3b76 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/ya.make @@ -0,0 +1,66 @@ +PY3TEST() + +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/datasource/postgresql/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=postgresql) + +IF (AUTOCHECK) + # Split tests to chunks only when they're running on different machines with distbuild, + # otherwise this directive will slow down local test execution. + # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. + FORK_SUBTESTS() + + # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose + TAG( + ya:external + ya:force_sandbox + ya:fat + ) + + REQUIREMENTS( + container:4467981730 + cpu:all + dns:dns64 + ) +ENDIF() + +INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) + +# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, +# which makes it impossible to run them during precommit checks on Github CI. +# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + +IF (OPENSOURCE) + SIZE(MEDIUM) + SET(TEST_TAGS_VALUE) + SET(TEST_REQUIREMENTS_VALUE) +ENDIF() + +TEST_SRCS( + collection.py + conftest.py + select_datetime.py + select_positive.py + select_positive_with_schema.py + test.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/ya.make new file mode 100644 index 000000000000..3f0f2c8f0117 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ya.make @@ -0,0 +1,5 @@ +RECURSE_FOR_TESTS( + clickhouse + postgresql + ydb +) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py new file mode 100644 index 000000000000..3c747d5fc9e2 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/collection.py @@ -0,0 +1,32 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind + +# import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as select_missing_database +# import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + # 'select_missing_database': select_missing_database.Factory().make_test_cases(EDataSourceKind.YDB), + # 'select_missing_table': select_missing_table.Factory().make_test_cases(EDataSourceKind.YDB), + 'select_positive': select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.YDB), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py new file mode 100644 index 000000000000..5e12b074140b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/conftest.py @@ -0,0 +1,4 @@ +from typing import Final +import pathlib + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/ydb") diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml new file mode 100644 index 000000000000..93a54ce2ffc8 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml @@ -0,0 +1,25 @@ +version: '3.4' +services: + ydb: + image: cr.yandex/yc/yandex-docker-local-ydb:23.3.17@sha256:bf9001c849cc6c4c9b56f32f5440a6e8390c4e841937c9f9caf929fd70a689c8 + container_name: fq-tests-ydb-ydb + hostname: fq-tests-ydb-ydb + environment: + YDB_DEFAULT_LOG_LEVEL: INFO + POSTGRES_USER: user + POSTGRES_PASSWORD: password + volumes: + - ./init/init_ydb:/init_ydb + - ./init/01_basic.sh:/01_basic.sh + + fq-connector-go: + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.12@sha256:dd2483ba061e25e8ee645bcc64cae8b8a0a93dba6772eb4b8ab0a0aab4b8dd48 + container_name: fq-tests-ydb-fq-connector-go + volumes: + - ../../fq-connector-go/:/opt/ydb/cfg/ + ports: + - 2130 + command: > + sh -c " + echo \"$$(dig fq-tests-ydb-ydb +short) fq-tests-ydb-ydb\" >> /etc/hosts; cat /etc/hosts; + /opt/ydb/bin/fq-connector-go server -c /opt/ydb/cfg/fq-connector-go.yaml" diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh new file mode 100755 index 000000000000..28daf80535da --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/init/01_basic.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +/ydb -p tests-ydb-client yql -s ' + CREATE TABLE column_selection_A_b_C_d_E_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_A_b_C_d_E_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_COL1_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_COL1_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_asterisk_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_asterisk_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_col2_COL1_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_col2_COL1_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_col2_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_col2_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + + CREATE TABLE column_selection_col3_NATIVE (COL1 Int32, col2 Int32, PRIMARY KEY (COL1)); + COMMIT; + INSERT INTO column_selection_col3_NATIVE (COL1, col2) VALUES + (1, 2), + (10, 20); + COMMIT; + ' + +echo $(date +"%T.%6N") "SUCCESS" diff --git a/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py new file mode 100644 index 000000000000..288782268cdb --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/test.py @@ -0,0 +1,50 @@ +import pytest +import time + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.ydb as scenario + +from conftest import docker_compose_dir +from collection import Collection + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as select_positive_common + + +class OneTimeWaiter: + __launched: bool = False + + def wait(self): + if self.__launched: + return + + # This should be enough for tables to initialize + time.sleep(3) + self.__launched = True + + +one_time_waiter = OneTimeWaiter() + +settings = Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.YDB]) +tc_collection = Collection(settings) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('select_positive'), ids=tc_collection.ids('select_positive')) +def test_select_positive( + request: pytest.FixtureRequest, + runner_type: str, + test_case: select_positive_common.TestCase, +): + # Let YDB container initialize tables + one_time_waiter.wait() + + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.select_positive( + settings=settings, + runner=runner, + test_case=test_case, + test_name=request.node.name, + ) diff --git a/ydb/core/kqp/ut/federated_query/generic/ya.make b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/ya.make similarity index 54% rename from ydb/core/kqp/ut/federated_query/generic/ya.make rename to ydb/library/yql/providers/generic/connector/tests/datasource/ydb/ya.make index 407624f56540..e79c76b3f4a9 100644 --- a/ydb/core/kqp/ut/federated_query/generic/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/ya.make @@ -1,4 +1,11 @@ -UNITTEST_FOR(ydb/core/kqp) +PY3TEST() + +STYLE_PYTHON() +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/datasource/ydb/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=ydb) IF (AUTOCHECK) # Split tests to chunks only when they're running on different machines with distbuild, @@ -20,24 +27,6 @@ IF (AUTOCHECK) ) ENDIF() -SRCS( - ch_recipe_ut_helpers.cpp - connector_recipe_ut_helpers.cpp - kqp_generic_plan_ut.cpp - kqp_generic_provider_join_ut.cpp - pg_recipe_ut_helpers.cpp -) - -PEERDIR( - contrib/libs/fmt - contrib/libs/libpqxx - library/cpp/clickhouse/client - ydb/core/kqp/ut/common - ydb/core/kqp/ut/federated_query/common - ydb/library/yql/providers/generic/connector/libcpp - ydb/library/yql/sql/pg_dummy -) - INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) # Including of docker_compose/recipe.inc automatically converts these tests into LARGE, @@ -50,6 +39,26 @@ IF (OPENSOURCE) SET(TEST_REQUIREMENTS_VALUE) ENDIF() -YQL_LAST_ABI_VERSION() +TEST_SRCS( + collection.py + conftest.py + test.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) END() diff --git a/ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml b/ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml new file mode 100644 index 000000000000..02c5903a5a4f --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/fq-connector-go/fq-connector-go.yaml @@ -0,0 +1,25 @@ +connector_server: + endpoint: + host: "0.0.0.0" + port: 2130 + +logger: + log_level: DEBUG + enable_sql_query_logging: true + +metrics_server: + endpoint: + host: "0.0.0.0" + port: 8766 + +pprof_server: + endpoint: + host: "0.0.0.0" + port: 6060 + +paging: + bytes_per_page: 4194304 + prefetch_queue_capacity: 2 + +conversion: + use_unsafe_converters: true diff --git a/ydb/library/yql/providers/generic/connector/tests/join/collection.py b/ydb/library/yql/providers/generic/connector/tests/join/collection.py new file mode 100644 index 000000000000..ad61792d2cf7 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/collection.py @@ -0,0 +1,26 @@ +from typing import Sequence, Mapping + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +import test_case + + +class Collection(object): + _test_cases: Mapping[str, Sequence] + + def __init__(self, ss: Settings): + self._test_cases = { + 'join': test_case.Factory().make_test_cases(), + } + + def get(self, key: str) -> Sequence: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return self._test_cases[key] + + def ids(self, key: str) -> Sequence[str]: + if key not in self._test_cases: + raise ValueError(f'no such test: {key}') + + return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/join/conftest.py b/ydb/library/yql/providers/generic/connector/tests/join/conftest.py new file mode 100644 index 000000000000..19caea7afe9f --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/conftest.py @@ -0,0 +1,38 @@ +from typing import Final +import dataclasses +import pathlib + +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import ( + make_client as make_clickhouse_client, + Client as ClickHouseClient, +) +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client as PostgreSQLClient +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/join") + + +@pytest.fixture +def settings() -> Settings: + return Settings.from_env( + docker_compose_dir=docker_compose_dir, + data_source_kinds=[EDataSourceKind.POSTGRESQL, EDataSourceKind.CLICKHOUSE], + ) + + +@dataclasses.dataclass +class Clients: + ClickHouse: ClickHouseClient + PostgreSQL: PostgreSQLClient + + +@pytest.fixture +def clients(settings): + return Clients( + ClickHouse=make_clickhouse_client(settings=settings.clickhouse), + PostgreSQL=PostgreSQLClient(settings=settings.postgresql), + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/docker-compose.yml b/ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml similarity index 58% rename from ydb/library/yql/providers/generic/connector/tests/docker-compose.yml rename to ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml index a8bfa53fb1ff..609c4a4942e6 100644 --- a/ydb/library/yql/providers/generic/connector/tests/docker-compose.yml +++ b/ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml @@ -1,17 +1,8 @@ version: '3.4' services: - postgresql: - image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 - container_name: ${USER}_connector-integration-tests-postgresql - environment: - POSTGRES_DB: db - POSTGRES_USER: user - POSTGRES_PASSWORD: password - ports: - - 5432 clickhouse: image: clickhouse/clickhouse-server:23-alpine@sha256:b078c1cd294632afa2aeba3530e7ba2e568513da23304354f455a25fab575c06 - container_name: ${USER}_connector-integration-tests-clickhouse + container_name: fq-tests-join-clickhouse environment: CLICKHOUSE_DB: db CLICKHOUSE_USER: user @@ -20,8 +11,20 @@ services: ports: - 9000 - 8123 + postgresql: + image: postgres:15-bullseye@sha256:3411b9f2e5239cd7867f34fcf22fe964230f7d447a71d63c283e3593d3f84085 + container_name: fq-tests-join-postgresql + environment: + POSTGRES_DB: db + POSTGRES_USER: user + POSTGRES_PASSWORD: password + command: ["postgres", "-c", "log_statement=all", "-c", "log_connections=on", "-c", "log_disconnections=on"] + ports: + - 5432 fq-connector-go: - container_name: ${USER}_connector-integration-tests-fq-connector-go - image: ghcr.io/ydb-platform/fq-connector-go:v0.1.1@sha256:47e24df143aee31a83d4a4cd0acc20b4cab8c03a9c63e81a6e99cb017a31f916 + container_name: fq-tests-join-fq-connector-go + image: ghcr.io/ydb-platform/fq-connector-go:v0.2.5@sha256:7f086ce3869b84a59fd76a10a9de8125c0d382915e956d34832105e03829a61b + volumes: + - ../fq-connector-go/:/opt/ydb/cfg/ ports: - - 50051 + - 2130 diff --git a/ydb/library/yql/providers/generic/connector/tests/join.py b/ydb/library/yql/providers/generic/connector/tests/join/scenario.py similarity index 60% rename from ydb/library/yql/providers/generic/connector/tests/join.py rename to ydb/library/yql/providers/generic/connector/tests/join/scenario.py index beecd8dac96b..3eff1950d8c6 100644 --- a/ydb/library/yql/providers/generic/connector/tests/join.py +++ b/ydb/library/yql/providers/generic/connector/tests/join/scenario.py @@ -1,33 +1,32 @@ -from pathlib import Path -import utils.postgresql - from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner -from utils.comparator import data_outs_equal -from utils.log import make_logger -from utils.runner import Runner -from utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client as ClickHouseClient +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.clickhouse as clickhouse_scenario +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client as PostgreSQLClient +import ydb.library.yql.providers.generic.connector.tests.utils.scenario.postgresql as postgresql_scenario -import clickhouse -import postgresql -import test_cases.join +from test_case import TestCase LOGGER = make_logger(__name__) def join( test_name: str, - test_case: test_cases.join.TestCase, + test_case: TestCase, settings: Settings, runner: Runner, - clickhouse_client: clickhouse.Client, - postgresql_client: utils.postgresql.Client, + clickhouse_client: ClickHouseClient, + postgresql_client: PostgreSQLClient, ): # prepare tables for data_source in test_case.data_sources: match data_source.kind: case EDataSourceKind.CLICKHOUSE: - clickhouse.prepare_table( + clickhouse_scenario.prepare_table( test_name=test_name, client=clickhouse_client, database=data_source.database, @@ -36,7 +35,7 @@ def join( schema=data_source.table.schema, ) case EDataSourceKind.POSTGRESQL: - postgresql.prepare_table( + postgresql_scenario.prepare_table( test_name=test_name, client=postgresql_client, database=data_source.database, diff --git a/ydb/library/yql/providers/generic/connector/tests/join/test.py b/ydb/library/yql/providers/generic/connector/tests/join/test.py new file mode 100644 index 000000000000..8af6b76f5a2a --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/test.py @@ -0,0 +1,40 @@ +import pytest + +from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.runners import runner_types, configure_runner + +import conftest +import scenario +from collection import Collection +from test_case import TestCase + +# Global collection of test cases dependent on environment +tc_collection = Collection( + Settings.from_env( + docker_compose_dir=conftest.docker_compose_dir, + data_source_kinds=[EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL], + ) +) + + +@pytest.mark.parametrize("runner_type", runner_types) +@pytest.mark.parametrize("test_case", tc_collection.get('join'), ids=tc_collection.ids('join')) +@pytest.mark.usefixtures("settings") +@pytest.mark.usefixtures("clients") +def test_join( + request: pytest.FixtureRequest, + settings: Settings, + runner_type: str, + clients: conftest.Clients, + test_case: TestCase, +): + runner = configure_runner(runner_type=runner_type, settings=settings) + scenario.join( + test_name=request.node.name, + clickhouse_client=clients.ClickHouse, + postgresql_client=clients.PostgreSQL, + runner=runner, + settings=settings, + test_case=test_case, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/join.py b/ydb/library/yql/providers/generic/connector/tests/join/test_case.py similarity index 94% rename from ydb/library/yql/providers/generic/connector/tests/test_cases/join.py rename to ydb/library/yql/providers/generic/connector/tests/join/test_case.py index 3d46473a652a..e10bad53372c 100644 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/join.py +++ b/ydb/library/yql/providers/generic/connector/tests/join/test_case.py @@ -6,9 +6,9 @@ from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat from ydb.public.api.protos.ydb_value_pb2 import Type -from utils.settings import Settings -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql from ydb.library.yql.providers.generic.connector.tests.utils.database import Database from ydb.library.yql.providers.generic.connector.tests.utils.data_source_kind import data_source_kind_alias from ydb.library.yql.providers.generic.connector.tests.utils.schema import ( @@ -138,7 +138,7 @@ class Factory: ), ] - def make_simple_test_cases(self) -> Sequence[TestCase]: + def __make_simple_test_cases(self) -> Sequence[TestCase]: tables: Sequence[TestCase] = [ Table( name='example_1', @@ -215,7 +215,7 @@ def make_simple_test_cases(self) -> Sequence[TestCase]: return test_cases - def make_inner_join_test_case(self) -> Sequence[TestCase]: + def __make_inner_join_test_case(self) -> Sequence[TestCase]: ch_table = Table( name='test_1', schema=Schema( @@ -269,4 +269,4 @@ def make_inner_join_test_case(self) -> Sequence[TestCase]: ] def make_test_cases(self) -> Sequence[TestCase]: - return self.make_simple_test_cases() + self.make_inner_join_test_case() + return self.__make_simple_test_cases() + self.__make_inner_join_test_case() diff --git a/ydb/library/yql/providers/generic/connector/tests/join/ya.make b/ydb/library/yql/providers/generic/connector/tests/join/ya.make new file mode 100644 index 000000000000..74b7e810afbe --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/join/ya.make @@ -0,0 +1,66 @@ +PY3TEST() + +NO_CHECK_IMPORTS() + +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/join/docker-compose.yml) +DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/fq-connector-go) +ENV(COMPOSE_PROJECT_NAME=join) + +IF (AUTOCHECK) + # Split tests to chunks only when they're running on different machines with distbuild, + # otherwise this directive will slow down local test execution. + # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. + FORK_SUBTESTS() + + # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose + TAG( + ya:external + ya:force_sandbox + ya:fat + ) + + REQUIREMENTS( + container:4467981730 + cpu:all + dns:dns64 + ) +ENDIF() + +INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) + +# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, +# which makes it impossible to run them during precommit checks on Github CI. +# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. + +IF (OPENSOURCE) + SIZE(MEDIUM) + SET(TEST_TAGS_VALUE) + SET(TEST_REQUIREMENTS_VALUE) +ENDIF() + +TEST_SRCS( + collection.py + conftest.py + scenario.py + test.py + test_case.py +) + +PEERDIR( + contrib/python/pytest + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/api/service/protos + ydb/library/yql/providers/generic/connector/tests/common_test_cases + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/run + ydb/library/yql/providers/generic/connector/tests/utils/scenario +) + +DEPENDS( + ydb/library/yql/tools/dqrun + ydb/tests/tools/kqprun + library/recipes/docker_compose/bin +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/test.py b/ydb/library/yql/providers/generic/connector/tests/test.py deleted file mode 100644 index 46d6cc718290..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test.py +++ /dev/null @@ -1,230 +0,0 @@ -from pathlib import Path -import pytest - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind - -from utils.settings import Settings -import clickhouse -import join -import postgresql -from test_cases.collection import Collection -import test_cases.join -import test_cases.select_missing_database -import test_cases.select_missing_table -import test_cases.select_positive_common -import utils.clickhouse -from utils.runner import Runner -from conftest import configure_runner -import utils.dqrun as dqrun -import utils.kqprun as kqprun -import utils.postgresql - - -# Global collection of test cases dependent on environment -tc_collection = Collection(Settings.from_env()) - -runners = (dqrun.DqRunner, kqprun.KqpRunner) -runners_ids = ("dqrun", "kqprun") - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_positive_postgresql'), ids=tc_collection.ids('select_positive_postgresql') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("postgresql_client") -def test_select_positive_postgresql( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - postgresql.select_positive( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_positive_clickhouse'), ids=tc_collection.ids('select_positive_clickhouse') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -def test_select_positive_clickhouse( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - clickhouse.select_positive( - test_name=request.node.name, settings=settings, runner=runner, client=clickhouse_client, test_case=test_case - ) - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_missing_database'), ids=tc_collection.ids('select_missing_database') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_select_missing_database( - request: pytest.FixtureRequest, - tmp_path: Path, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_missing_database.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - match test_case.data_source_kind: - case EDataSourceKind.CLICKHOUSE: - clickhouse.select_missing_table( - settings=settings, - runner=runner, - client=clickhouse_client, - test_case=test_case, - test_name=request.node.name, - ) - case EDataSourceKind.POSTGRESQL: - postgresql.select_missing_table( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) - case _: - raise Exception(f'invalid data source: {test_case.data_source_kind}') - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", tc_collection.get('select_missing_table'), ids=tc_collection.ids('select_missing_table') -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_select_missing_table( - request: pytest.FixtureRequest, - tmp_path: Path, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_missing_table.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - match test_case.data_source_kind: - case EDataSourceKind.CLICKHOUSE: - clickhouse.select_missing_table( - test_name=request.node.name, - settings=settings, - runner=runner, - client=clickhouse_client, - test_case=test_case, - ) - case EDataSourceKind.POSTGRESQL: - postgresql.select_missing_table( - test_name=request.node.name, - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - ) - case _: - raise Exception(f'invalid data source: {test_case.data_source_kind}') - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize("test_case", tc_collection.get('join'), ids=tc_collection.ids('join')) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_join( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.join.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - join.join( - test_name=request.node.name, - clickhouse_client=clickhouse_client, - postgresql_client=postgresql_client, - runner=runner, - settings=settings, - test_case=test_case, - ) - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize("test_case", tc_collection.get('select_datetime'), ids=tc_collection.ids('select_datetime')) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("clickhouse_client") -@pytest.mark.usefixtures("postgresql_client") -def test_select_datetime( - request: pytest.FixtureRequest, - tmp_path: Path, - settings: Settings, - runner_type: Runner, - clickhouse_client: utils.clickhouse.Client, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - match test_case.data_source_kind: - case EDataSourceKind.CLICKHOUSE: - clickhouse.select_positive( - test_name=request.node.name, - test_case=test_case, - settings=settings, - runner=runner, - client=clickhouse_client, - ) - case EDataSourceKind.POSTGRESQL: - postgresql.select_positive( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) - case _: - raise Exception(f'invalid data source: {test_case.data_source_kind}') - - -@pytest.mark.parametrize("runner_type", runners, ids=runners_ids) -@pytest.mark.parametrize( - "test_case", - tc_collection.get('select_positive_postgresql_schema'), - ids=tc_collection.ids('select_positive_postgresql_schema'), -) -@pytest.mark.usefixtures("settings") -@pytest.mark.usefixtures("postgresql_client") -def test_select_pg_schema( - request: pytest.FixtureRequest, - settings: Settings, - runner_type: Runner, - postgresql_client: utils.postgresql.Client, - test_case: test_cases.select_positive_common.TestCase, -): - runner = configure_runner(runner=runner_type, settings=settings) - postgresql.select_pg_schema( - settings=settings, - runner=runner, - client=postgresql_client, - test_case=test_case, - test_name=request.node.name, - ) diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py b/ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py deleted file mode 100644 index 429d69c32892..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/collection.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Sequence, Mapping - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind -import ydb.library.yql.providers.generic.connector.tests.test_cases.join as join -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_datetime as select_datetime -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_missing_database as select_missing_database -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_missing_table as select_missing_table -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_clickhouse as select_positive_clickhouse -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_common as select_positive_common -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_postgresql as select_positive_postgresql -import ydb.library.yql.providers.generic.connector.tests.test_cases.select_positive_postgresql_schema as select_positive_postgresql_schema -from utils.settings import Settings - - -class Collection(object): - _test_cases: Mapping[str, Sequence] - - def __init__(self, ss: Settings): - self._test_cases = { - 'join': join.Factory().make_test_cases(), - 'select_missing_database': select_missing_database.Factory().make_test_cases(), - 'select_missing_table': select_missing_table.Factory().make_test_cases(), - 'select_positive_postgresql': select_positive_postgresql.Factory().make_test_cases() - + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.POSTGRESQL), - 'select_positive_postgresql_schema': select_positive_postgresql_schema.Factory().make_test_cases(), - 'select_positive_clickhouse': select_positive_clickhouse.Factory().make_test_cases() - + select_positive_common.Factory(ss).make_test_cases(EDataSourceKind.CLICKHOUSE), - 'select_datetime': select_datetime.Factory().make_test_cases(), - } - - def get(self, key: str) -> Sequence: - if key not in self._test_cases: - raise ValueError(f'no such test: {key}') - - return self._test_cases[key] - - def ids(self, key: str) -> Sequence[str]: - if key not in self._test_cases: - raise ValueError(f'no such test: {key}') - - return map(lambda tc: tc.name, self._test_cases[key]) diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py b/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py deleted file mode 100644 index e0eaab481876..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_database.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import List - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase - - -TestCase = BaseTestCase - - -class Factory: - def make_test_cases(self) -> List[TestCase]: - data_source_kinds = ( - EDataSourceKind.CLICKHOUSE, - EDataSourceKind.POSTGRESQL, - ) - - test_cases = [] - for data_source_kind in data_source_kinds: - test_case_name = 'missing_database' - - test_case = TestCase( - name_=test_case_name, - data_source_kind=data_source_kind, - protocol=EProtocol.NATIVE, - pragmas=dict(), - ) - - test_cases.append(test_case) - - return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py b/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py deleted file mode 100644 index 42c2230755b3..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/test_cases/select_missing_table.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import List - -from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind, EProtocol -from ydb.library.yql.providers.generic.connector.tests.test_cases.base import BaseTestCase - - -TestCase = BaseTestCase - - -class Factory: - def make_test_cases(self) -> List[TestCase]: - data_source_kinds = ( - EDataSourceKind.CLICKHOUSE, - EDataSourceKind.POSTGRESQL, - ) - - test_cases = [] - for data_source_kind in data_source_kinds: - test_case_name = 'missing_table' - - test_case = TestCase( - name_=test_case_name, - data_source_kind=data_source_kind, - protocol=EProtocol.NATIVE, - pragmas=dict(), - ) - - test_cases.append(test_case) - - return test_cases diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py new file mode 100644 index 000000000000..c23aa6f2cf9b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/clickhouse.py @@ -0,0 +1,32 @@ +from typing import TypeAlias +from datetime import datetime +import sys +import time + +import clickhouse_connect +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +Client: TypeAlias = clickhouse_connect.driver.client.Client + + +def make_client(settings: Settings.ClickHouse) -> Client: + start = datetime.now() + attempt = 0 + + while (datetime.now() - start).total_seconds() < 60: + attempt += 1 + try: + client = clickhouse_connect.get_client( + host=settings.host_external, + port=settings.http_port_external, + username=settings.username, + password=settings.password, + ) + except Exception as e: + sys.stderr.write(f"attempt #{attempt}: {e}\n") + time.sleep(5) + continue + + return client + + raise Exception(f"Failed to connect ClickHouse in {attempt} attempt(s)") diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py new file mode 100644 index 000000000000..544cd3815e6b --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/postgresql.py @@ -0,0 +1,60 @@ +from contextlib import contextmanager +import time +from datetime import datetime +from typing import Tuple + +import pg8000.dbapi + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger + +LOGGER = make_logger(__name__) + + +class Client: + # database name -> pool + settings: Settings.PostgreSQL + + def __init__(self, settings: Settings.PostgreSQL): + self.settings = settings + self.pools = dict() + LOGGER.debug("initializing client") + + @contextmanager + def get_cursor(self, dbname: str): + conn, cursor = self._make_cursor(dbname=dbname) + yield conn, cursor + cursor.close() + conn.close() + + def _make_cursor(self, dbname: str) -> Tuple[pg8000.dbapi.Connection, pg8000.dbapi.Cursor]: + LOGGER.debug(f"making cursor for database {dbname}") + start = datetime.now() + attempt = 0 + + while (datetime.now() - start).total_seconds() < 10: + attempt += 1 + try: + LOGGER.debug( + f"trying to connect PostgreSQL: {self.settings.host_external}:{self.settings.port_external}" + ) + conn = pg8000.dbapi.Connection( + user=self.settings.username, + password=self.settings.password, + host=self.settings.host_external, + port=self.settings.port_external, + database=dbname, + timeout=10, + ) + conn.autocommit = True + + cur = conn.cursor() + return conn, cur + except Exception as e: + LOGGER.error(f"connection attempt #{attempt} failed: {e} {e.args}") + time.sleep(1) + continue + + ss = self.settings + params = f'{ss.username} {ss.password} {ss.host_external} {ss.port_external} {dbname}' + raise Exception(f"Failed to connect PostgreSQL in {attempt} attempt(s) with params: {params}") diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make new file mode 100644 index 000000000000..c0845060e513 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ya.make @@ -0,0 +1,16 @@ +PY3_LIBRARY() + +PY_SRCS( + clickhouse.py + postgresql.py + ydb.py +) + +PEERDIR( + contrib/python/clickhouse-connect + contrib/python/pg8000 + ydb/public/sdk/python + ydb/library/yql/providers/generic/connector/tests/utils +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py new file mode 100644 index 000000000000..f4e2345789ed --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/clients/ydb.py @@ -0,0 +1,10 @@ +import ydb +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + + +def make_client(s: Settings.Ydb) -> ydb.Driver: + endpoint = f"grpc://{s.host_external}:{s.port_external}" + + driver = ydb.Driver(endpoint=endpoint, database=s.dbname, credentials=ydb.AnonymousCredentials()) + driver.wait(timeout=5) + return driver diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/database.py b/ydb/library/yql/providers/generic/connector/tests/utils/database.py index c691aed42757..5b9c9cbe9305 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/database.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/database.py @@ -17,6 +17,10 @@ def __init__(self, name: str, kind: EDataSourceKind.ValueType): self.name = name[:63].lower() case EDataSourceKind.CLICKHOUSE: self.name = name[:255] + case EDataSourceKind.YDB: + # We use a different way of initialization when working with YDB. + # There is only one preinstalled database called + self.name = "local" case _: raise Exception(f'invalid data source: {self.kind}') diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py b/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py index 7d5b2b13d855..98ca5788cf79 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/docker_compose.py @@ -1,22 +1,98 @@ import os import subprocess +import shutil +import yaml +import socket +from typing import Dict, Any import yatest.common +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger + +LOGGER = make_logger(__name__) + class EndpointDeterminer: - docker_compose_bin: os.PathLike - docker_compose_yml: os.PathLike + docker_bin_path: os.PathLike + docker_compose_bin_path: os.PathLike + + docker_compose_yml_path: os.PathLike + docker_compose_yml_data: Dict[str, Any] - def __init__(self, docker_compose_yml: os.PathLike): - self.docker_compose_bin = yatest.common.build_path('library/recipes/docker_compose/bin/docker-compose') - self.docker_compose_yml = docker_compose_yml + def __init__(self, docker_compose_yml_path: os.PathLike): + self.docker_bin_path = shutil.which('docker') + self.docker_compose_bin_path = yatest.common.build_path('library/recipes/docker_compose/bin/docker-compose') + self.docker_compose_yml_path = docker_compose_yml_path - def get_port(self, service_name: str, internal_port: int) -> int: - cmd = [self.docker_compose_bin, '-f', self.docker_compose_yml, 'port', service_name, str(internal_port)] + with open(self.docker_compose_yml_path) as f: + self.docker_compose_yml_data = yaml.load(f) + + def get_external_port(self, service_name: str, internal_port: int) -> int: + cmd = [ + self.docker_compose_bin_path, + '-f', + self.docker_compose_yml_path, + 'port', + service_name, + str(internal_port), + ] try: out = subprocess.check_output(cmd, stderr=subprocess.STDOUT) external_port = int(out.split(b':')[1]) return external_port except subprocess.CalledProcessError as e: raise RuntimeError(f"docker-compose error: {e.output} (code {e.returncode})") + + @staticmethod + def __is_valid_ipv4_address(address: str) -> bool: + try: + socket.inet_pton(socket.AF_INET, address) + except AttributeError as e1: # no inet_pton here, sorry + LOGGER.warn(f"validate '{address}' with inet_pton error: {e1}") + try: + socket.inet_aton(address) + except socket.error as e2: + LOGGER.error(f"validate '{address}' with inet_aton error: {e2}") + return False + return address.count('.') == 3 + except socket.error as e3: # not a valid address + LOGGER.error(f"validate '{address}' with inet_pton error: {e3}") + return False + + return True + + @staticmethod + def __is_valid_ipv6_address(address: str) -> bool: + try: + socket.inet_pton(socket.AF_INET6, address) + except socket.error: # not a valid address + return False + return True + + @staticmethod + def __is_valid_ip_address(address: str) -> bool: + return EndpointDeterminer.__is_valid_ipv4_address(address) or EndpointDeterminer.__is_valid_ipv6_address( + address + ) + + def get_internal_ip(self, service_name: str) -> str: + container_name = self.docker_compose_yml_data['services'][service_name]['container_name'] + cmd = [ + self.docker_bin_path, + "inspect", + "-f", + "'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'", + container_name, + ] + try: + out = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode('utf8').strip().strip("'") + + if not EndpointDeterminer.__is_valid_ip_address(out): + raise ValueError(f"IP determined for container '{container_name}' is invalid: '{out}'") + + return out + except subprocess.CalledProcessError as e: + raise RuntimeError(f"docker-compose error: {e.output} (code {e.returncode})") + + def get_container_name(self, service_name: str) -> str: + return self.docker_compose_yml_data['services'][service_name]['container_name'] diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/generate.py b/ydb/library/yql/providers/generic/connector/tests/utils/generate.py index 93efd5e58273..879c7f43140e 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/generate.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/generate.py @@ -2,7 +2,7 @@ from typing import Sequence -from utils.schema import Schema +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema from ydb.public.api.protos.ydb_value_pb2 import Type diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py deleted file mode 100644 index b8d77e6c1775..000000000000 --- a/ydb/library/yql/providers/generic/connector/tests/utils/postgresql.py +++ /dev/null @@ -1,176 +0,0 @@ -from contextlib import contextmanager -import abc -import time -from datetime import datetime -from typing import Tuple -import sys - -import pg8000.dbapi - -from utils.settings import Settings - - -class Client: - # database name -> pool - settings: Settings.PostgreSQL - - def __init__(self, settings: Settings.PostgreSQL): - self.settings = settings - self.pools = dict() - - @contextmanager - def get_cursor(self, dbname: str): - conn, cursor = self._make_cursor(dbname=dbname) - yield conn, cursor - cursor.close() - conn.close() - - def _make_cursor(self, dbname: str) -> Tuple[pg8000.dbapi.Connection, pg8000.dbapi.Cursor]: - start = datetime.now() - attempt = 0 - - while (datetime.now() - start).total_seconds() < 10: - attempt += 1 - try: - sys.stdout.write( - f"Trying to connect PostgreSQL: {self.settings.host_external}:{self.settings.port_external}\n" - ) - conn = pg8000.dbapi.Connection( - user=self.settings.username, - password=self.settings.password, - host=self.settings.host_external, - port=self.settings.port_external, - database=dbname, - timeout=10, - ) - conn.autocommit = True - - cur = conn.cursor() - return conn, cur - except Exception as e: - sys.stderr.write(f"attempt #{attempt} failed: {e} {e.args}\n") - time.sleep(3) - continue - - ss = self.settings - params = f'{ss.username} {ss.password} {ss.host_external} {ss.port_external} {dbname}' - raise Exception(f"Failed to connect PostgreSQL in {attempt} attempt(s) with params: {params}") - - -class Type(abc.ABC): - @abc.abstractmethod - def to_sql(self) -> str: - pass - - -class PrimitiveType(Type): - def to_sql(self): - return type(self).__name__.lower() - - -class Boolean(PrimitiveType): - pass - - -class Bool(PrimitiveType): - pass - - -class SmallInt(PrimitiveType): - pass - - -class Int2(PrimitiveType): - pass - - -class SmallSerial(PrimitiveType): - pass - - -class Serial2(PrimitiveType): - pass - - -class Integer(PrimitiveType): - pass - - -class Int(PrimitiveType): - pass - - -class Int4(PrimitiveType): - pass - - -class Serial(PrimitiveType): - pass - - -class Serial4(PrimitiveType): - pass - - -class BigInt(PrimitiveType): - pass - - -class Int8(PrimitiveType): - pass - - -class BigSerial(PrimitiveType): - pass - - -class Serial8(PrimitiveType): - pass - - -class Real(PrimitiveType): - pass - - -class Float4(PrimitiveType): - pass - - -class DoublePrecision(PrimitiveType): - def to_sql(self): - return 'double precision' - - -class Float8(PrimitiveType): - pass - - -class Bytea(PrimitiveType): - pass - - -class Character(PrimitiveType): - def to_sql(self): - return 'character (5)' - - -class CharacterVarying(PrimitiveType): - def to_sql(self): - return 'character varying (5)' - - -class Text(PrimitiveType): - pass - - -class TimestampWithoutTimeZone(PrimitiveType): - def to_sql(self): - return 'timestamp without time zone' - - -class Date(PrimitiveType): - pass - - -class Time(PrimitiveType): - pass diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/dqrun.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/dqrun.py similarity index 91% rename from ydb/library/yql/providers/generic/connector/tests/utils/dqrun.py rename to ydb/library/yql/providers/generic/connector/tests/utils/run/dqrun.py index f3ee81780d84..5ffc0140e9c2 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/dqrun.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/dqrun.py @@ -10,16 +10,18 @@ from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts -from ydb.library.yql.providers.generic.connector.tests.utils.runner import Result, Runner from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings, GenericSettings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.run.result import Result + LOGGER = make_logger(__name__) class GatewaysConfRenderer: - template_: Final = ''' + _template: Final = ''' Generic { Connector { Endpoint { @@ -99,6 +101,26 @@ class GatewaysConfRenderer: }} {% endfor %} +{% for cluster in generic_settings.ydb_clusters %} + ClusterMapping { + Kind: YDB + Name: "{{settings.ydb.cluster_name}}" + DatabaseName: "{{cluster.database}}" + Credentials { + basic { + username: "{{settings.ydb.username}}" + password: "{{settings.ydb.password}}" + } + } + Endpoint { + host: "{{settings.ydb.host_internal}}" + port: {{settings.ydb.port_internal}} + } + UseSsl: false + Protocol: NATIVE + } +{% endfor %} + DefaultSettings { Name: "DateTimeFormat" {% if generic_settings.date_time_format == EDateTimeFormat.STRING_FORMAT %} @@ -107,7 +129,6 @@ class GatewaysConfRenderer: Value: "YQL" {% endif %} } - } Dq { @@ -165,7 +186,7 @@ class GatewaysConfRenderer: def __init__(self): self.template = jinja2.Environment(loader=jinja2.BaseLoader, undefined=jinja2.DebugUndefined).from_string( - self.template_ + self._template ) self.template.globals['EProtocol'] = EProtocol self.template.globals['EDateTimeFormat'] = EDateTimeFormat diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/kqprun.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/kqprun.py similarity index 93% rename from ydb/library/yql/providers/generic/connector/tests/utils/kqprun.py rename to ydb/library/yql/providers/generic/connector/tests/utils/run/kqprun.py index 603f8fd96c33..9433d4db2296 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/kqprun.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/kqprun.py @@ -1,20 +1,21 @@ from pathlib import Path -import subprocess from typing import Final +import json +import subprocess import jinja2 -import json - from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EProtocol from ydb.library.yql.providers.generic.connector.api.service.protos.connector_pb2 import EDateTimeFormat import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts -from ydb.library.yql.providers.generic.connector.tests.utils.runner import Result, Runner from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings, GenericSettings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.run.result import Result + LOGGER = make_logger(__name__) @@ -32,8 +33,10 @@ class SchemeRenderer: AUTH_METHOD="BASIC", LOGIN="{{login}}", PASSWORD_SECRET_NAME="{{data_source}}_local_password", - USE_TLS="FALSE", - PROTOCOL="{{protocol}}" + {% if protocol %} + PROTOCOL="{{protocol}}", + {% endif %} + USE_TLS="FALSE" {% if kind == POSTGRESQL and schema %} ,SCHEMA="{{schema}}" @@ -44,6 +47,7 @@ class SchemeRenderer: {% set CLICKHOUSE = 'ClickHouse' %} {% set POSTGRESQL = 'PostgreSQL' %} +{% set YDB = 'Ydb' %} {% set NATIVE = 'NATIVE' %} {% set HTTP = 'HTTP' %} @@ -85,6 +89,20 @@ class SchemeRenderer: }} {% endfor %} +{% for cluster in generic_settings.ydb_clusters %} +{{ create_data_source( + YDB, + settings.ydb.cluster_name, + settings.ydb.host_internal, + settings.ydb.port_internal, + settings.ydb.username, + settings.ydb.password, + NONE, + cluster.database, + NONE) +}} +{% endfor %} + ''' def __init__(self): diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/runner.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/parent.py similarity index 69% rename from ydb/library/yql/providers/generic/connector/tests/utils/runner.py rename to ydb/library/yql/providers/generic/connector/tests/utils/run/parent.py index d5d4b12e1000..d297efd01dbe 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/runner.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/parent.py @@ -1,20 +1,8 @@ from abc import ABC, abstractmethod -from dataclasses import dataclass from pathlib import Path -from typing import List, Optional from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings, GenericSettings -from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema, YsonList - - -@dataclass -class Result: - data_out: Optional[YsonList] - data_out_with_types: Optional[List] - schema: Optional[Schema] - stdout: str - stderr: str - returncode: int +from ydb.library.yql.providers.generic.connector.tests.utils.run.result import Result class Runner(ABC): diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/run/result.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/result.py new file mode 100644 index 000000000000..45078d16bd00 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/result.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass +from typing import Optional, List + +from yt import yson +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema + + +@dataclass +class Result: + data_out: Optional[yson.yson_types.YsonList] + data_out_with_types: Optional[List] + schema: Optional[Schema] + stdout: str + stderr: str + returncode: int diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py b/ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py new file mode 100644 index 000000000000..435502c7d55e --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/runners.py @@ -0,0 +1,23 @@ +from typing import Final + +import yatest.common as yat + +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings + +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.run.dqrun import DqRunner +from ydb.library.yql.providers.generic.connector.tests.utils.run.kqprun import KqpRunner + +# used in every test.py +runner_types: Final = ("dqrun", "kqprun") + + +# used in every test.py +def configure_runner(runner_type: str, settings: Settings) -> Runner: + match runner_type: + case "dqrun": + return DqRunner(dqrun_path=yat.build_path("ydb/library/yql/tools/dqrun/dqrun"), settings=settings) + case "kqprun": + return KqpRunner(kqprun_path=yat.build_path("ydb/tests/tools/kqprun/kqprun"), settings=settings) + case _: + raise ValueError(runner_type) diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make new file mode 100644 index 000000000000..8173db5b86d4 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/run/ya.make @@ -0,0 +1,23 @@ +PY3_LIBRARY() + +STYLE_PYTHON() + +PY_SRCS( + dqrun.py + kqprun.py + parent.py + result.py + runners.py +) + +PEERDIR( + contrib/python/Jinja2 + contrib/python/PyYAML + ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/api/service/protos + ydb/library/yql/providers/generic/connector/tests/utils + ydb/public/api/protos + yt/python/yt/yson +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/clickhouse.py similarity index 75% rename from ydb/library/yql/providers/generic/connector/tests/clickhouse.py rename to ydb/library/yql/providers/generic/connector/tests/utils/scenario/clickhouse.py index ea5588dda62c..2429556ffafc 100644 --- a/ydb/library/yql/providers/generic/connector/tests/clickhouse.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/clickhouse.py @@ -2,19 +2,19 @@ import ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 as data_source_pb2 -import utils.artifacts as artifacts -from utils.clickhouse import Client -from utils.comparator import data_outs_equal -from utils.database import Database -from utils.log import make_logger, debug_with_limit -from utils.schema import Schema -from utils.settings import Settings -from utils.runner import Runner -from utils.sql import format_values_for_bulk_sql_insert - -import test_cases.select_missing_database -import test_cases.select_missing_table -import test_cases.select_positive_common +import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.database import Database +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger, debug_with_limit +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.sql import format_values_for_bulk_sql_insert +from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as tc_select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as tc_select_missing_table +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as tc_select_positive_common LOGGER = make_logger(__name__) @@ -60,7 +60,7 @@ def prepare_table( def select_positive( test_name: str, - test_case: test_cases.select_missing_table.TestCase, + test_case: tc_select_positive_common.TestCase, settings: Settings, runner: Runner, client: Client, @@ -113,7 +113,7 @@ def select_positive( def select_missing_database( test_name: str, - test_case: test_cases.select_missing_database.TestCase, + test_case: tc_select_missing_database.TestCase, settings: Settings, runner: Runner, ): @@ -133,7 +133,7 @@ def select_missing_database( def select_missing_table( test_name: str, - test_case: test_cases.select_missing_table.TestCase, + test_case: tc_select_missing_table.TestCase, settings: Settings, runner: Runner, client: Client, diff --git a/ydb/library/yql/providers/generic/connector/tests/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/postgresql.py similarity index 81% rename from ydb/library/yql/providers/generic/connector/tests/postgresql.py rename to ydb/library/yql/providers/generic/connector/tests/utils/scenario/postgresql.py index b8c381466195..babcc37abd12 100644 --- a/ydb/library/yql/providers/generic/connector/tests/postgresql.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/postgresql.py @@ -2,22 +2,19 @@ import ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 as data_source_pb2 -import utils.artifacts as artifacts -from utils.comparator import data_outs_equal -from utils.database import Database -from utils.log import make_logger, debug_with_limit -from utils.postgresql import Client -from utils.schema import Schema -from utils.settings import Settings -from utils.runner import Runner -from utils.sql import format_values_for_bulk_sql_insert - - -import test_cases.select_missing_database -import test_cases.select_missing_table -import test_cases.select_positive_common -import test_cases.select_positive_postgresql_schema - +import ydb.library.yql.providers.generic.connector.tests.utils.artifacts as artifacts +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.database import Database +from ydb.library.yql.providers.generic.connector.tests.utils.log import make_logger, debug_with_limit +from ydb.library.yql.providers.generic.connector.tests.utils.schema import Schema +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner +from ydb.library.yql.providers.generic.connector.tests.utils.sql import format_values_for_bulk_sql_insert +from ydb.library.yql.providers.generic.connector.tests.utils.clients.postgresql import Client + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as tc_select_positive_common +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_database as tc_select_missing_database +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_missing_table as tc_select_missing_table LOGGER = make_logger(__name__) @@ -88,7 +85,7 @@ def prepare_table( def select_positive( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_positive_common.TestCase, settings: Settings, runner: Runner, client: Client, @@ -133,7 +130,7 @@ def select_positive( def select_missing_database( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_missing_database.TestCase, settings: Settings, runner: Runner, ): @@ -154,7 +151,7 @@ def select_missing_database( def select_missing_table( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_missing_table.TestCase, settings: Settings, runner: Runner, client: Client, @@ -190,7 +187,7 @@ def select_missing_table( def select_pg_schema( test_name: str, - test_case: test_cases.select_positive_common.TestCase, + test_case: tc_select_positive_common.TestCase, settings: Settings, runner: Runner, client: Client, diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make new file mode 100644 index 000000000000..d4067949140e --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ya.make @@ -0,0 +1,17 @@ +PY3_LIBRARY() + +PY_SRCS( + clickhouse.py + postgresql.py + ydb.py +) + +PEERDIR( + contrib/python/clickhouse-connect + contrib/python/pg8000 + ydb/library/yql/providers/generic/connector/tests/utils + ydb/library/yql/providers/generic/connector/tests/utils/clients + ydb/library/yql/providers/generic/connector/tests/utils/run +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py new file mode 100644 index 000000000000..67f96b15b87c --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/scenario/ydb.py @@ -0,0 +1,38 @@ +from ydb.library.yql.providers.generic.connector.tests.utils.comparator import data_outs_equal +from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings +from ydb.library.yql.providers.generic.connector.tests.utils.run.parent import Runner + +import ydb.library.yql.providers.generic.connector.tests.common_test_cases.select_positive_common as tc_select_positive_common + + +def select_positive( + test_name: str, + test_case: tc_select_positive_common.TestCase, + settings: Settings, + runner: Runner, +): + # read data + where_statement = "" + if test_case.select_where is not None: + where_statement = "WHERE " + test_case.select_where.render( + cluster_name=settings.ydb.cluster_name, + table_name=test_case.qualified_table_name, + ) + yql_script = f""" + {test_case.pragmas_sql_string} + SELECT {test_case.select_what.yql_select_names} + FROM {settings.ydb.cluster_name}.{test_case.qualified_table_name} + {where_statement} + """ + result = runner.run( + test_name=test_name, + script=yql_script, + generic_settings=test_case.generic_settings, + ) + + assert result.returncode == 0, result.stderr + + assert data_outs_equal(test_case.data_out, result.data_out_with_types), ( + test_case.data_out, + result.data_out_with_types, + ) diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/schema.py b/ydb/library/yql/providers/generic/connector/tests/utils/schema.py index fb5771c35991..e0ba178cae18 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/schema.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/schema.py @@ -8,8 +8,9 @@ from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind from ydb.public.api.protos.ydb_value_pb2 import Type, OptionalType -import ydb.library.yql.providers.generic.connector.tests.utils.clickhouse as clickhouse -import ydb.library.yql.providers.generic.connector.tests.utils.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.clickhouse as clickhouse +import ydb.library.yql.providers.generic.connector.tests.utils.types.postgresql as postgresql +import ydb.library.yql.providers.generic.connector.tests.utils.types.ydb as Ydb YsonList: TypeAlias = yson.yson_types.YsonList @@ -18,6 +19,7 @@ class DataSourceType: ch: clickhouse.Type = None pg: postgresql.Type = None + ydb: Ydb.Type = None def pick(self, kind: EDataSourceKind.ValueType) -> str: target = None @@ -26,6 +28,8 @@ def pick(self, kind: EDataSourceKind.ValueType) -> str: target = self.ch case EDataSourceKind.POSTGRESQL: target = self.pg + case EDataSourceKind.YDB: + target = self.ydb case _: raise Exception(f'invalid data source: {kind}') diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/settings.py b/ydb/library/yql/providers/generic/connector/tests/utils/settings.py index 97bfde8d3394..51971998665d 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/settings.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/settings.py @@ -1,5 +1,6 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional, Sequence +import pathlib import yatest.common @@ -39,7 +40,7 @@ class PostgreSQL: dbname: str cluster_name: str username: str - password: Optional[str] + password: Optional[str] # TODO: why optional? host_external: str host_internal: str port_external: int @@ -47,42 +48,79 @@ class PostgreSQL: postgresql: PostgreSQL + @dataclass + class Ydb: + dbname: str + cluster_name: str + username: str + password: str + host_internal: str + port_internal: int + + ydb: Ydb + @classmethod - def from_env(cls) -> 'Settings': - docker_compose_file = yatest.common.source_path( - 'ydb/library/yql/providers/generic/connector/tests/docker-compose.yml' - ) - endpoint_determiner = EndpointDeterminer(docker_compose_file) + def from_env(cls, docker_compose_dir: pathlib.Path, data_source_kinds: Sequence[EDataSourceKind]) -> 'Settings': + docker_compose_file_relative_path = str(docker_compose_dir / 'docker-compose.yml') + docker_compose_file_abs_path = yatest.common.source_path(docker_compose_file_relative_path) + endpoint_determiner = EndpointDeterminer(docker_compose_file_abs_path) + + data_sources = dict() + + for data_source_kind in data_source_kinds: + match data_source_kind: + case EDataSourceKind.CLICKHOUSE: + data_sources[data_source_kind] = cls.ClickHouse( + cluster_name='clickhouse_integration_test', + host_external='0.0.0.0', + # This hack is due to https://st.yandex-team.ru/YQ-3003. + # Previously we used container names instead of container ips: + # host_internal=docker_compose_file['services']['clickhouse']['container_name'], + host_internal=endpoint_determiner.get_internal_ip('clickhouse'), + http_port_external=endpoint_determiner.get_external_port('clickhouse', 8123), + native_port_external=endpoint_determiner.get_external_port('clickhouse', 9000), + http_port_internal=8123, + native_port_internal=9000, + username='user', + password='password', + protocol='native', + ) + case EDataSourceKind.POSTGRESQL: + data_sources[data_source_kind] = cls.PostgreSQL( + cluster_name='postgresql_integration_test', + host_external='0.0.0.0', + # This hack is due to https://st.yandex-team.ru/YQ-3003. + # Previously we used container names instead of container ips: + # host_internal=docker_compose_file['services']['postgresql']['container_name'], + host_internal=endpoint_determiner.get_internal_ip('postgresql'), + port_external=endpoint_determiner.get_external_port('postgresql', 5432), + port_internal=5432, + dbname='db', + username='user', + password='password', + ) + case EDataSourceKind.YDB: + data_sources[data_source_kind] = cls.Ydb( + cluster_name='ydb_integration_test', + host_internal=endpoint_determiner.get_container_name('ydb'), + port_internal=2136, + dbname="local", + username='user', + password='password', + ) + case _: + raise Exception(f'invalid data source: {data_source_kind}') return cls( connector=cls.Connector( grpc_host='localhost', - grpc_port=endpoint_determiner.get_port('fq-connector-go', 50051), + grpc_port=endpoint_determiner.get_external_port('fq-connector-go', 2130), paging_bytes_per_page=4 * 1024 * 1024, paging_prefetch_queue_capacity=2, ), - clickhouse=cls.ClickHouse( - cluster_name='clickhouse_integration_test', - host_external='localhost', - host_internal='clickhouse', - http_port_external=endpoint_determiner.get_port('clickhouse', 8123), - native_port_external=endpoint_determiner.get_port('clickhouse', 9000), - http_port_internal=8123, - native_port_internal=9000, - username='user', - password='password', - protocol='native', - ), - postgresql=cls.PostgreSQL( - cluster_name='postgresql_integration_test', - host_external='localhost', - host_internal='postgresql', - port_external=endpoint_determiner.get_port('postgresql', 5432), - port_internal=5432, - dbname='db', - username='user', - password='password', - ), + clickhouse=data_sources.get(EDataSourceKind.CLICKHOUSE), + postgresql=data_sources.get(EDataSourceKind.POSTGRESQL), + ydb=data_sources.get(EDataSourceKind.YDB), ) def get_cluster_name(self, data_source_kind: EDataSourceKind) -> str: @@ -97,6 +135,8 @@ def get_cluster_name(self, data_source_kind: EDataSourceKind) -> str: @dataclass class GenericSettings: + date_time_format: EDateTimeFormat + @dataclass class ClickHouseCluster: def __hash__(self) -> int: @@ -105,7 +145,7 @@ def __hash__(self) -> int: database: str protocol: EProtocol - clickhouse_clusters: Sequence[ClickHouseCluster] + clickhouse_clusters: Sequence[ClickHouseCluster] = field(default_factory=list) @dataclass class PostgreSQLCluster: @@ -115,6 +155,10 @@ def __hash__(self) -> int: database: str schema: str - postgresql_clusters: Sequence[PostgreSQLCluster] + postgresql_clusters: Sequence[PostgreSQLCluster] = field(default_factory=list) - date_time_format: EDateTimeFormat + @dataclass + class YdbCluster: + database: str + + ydb_clusters: Sequence[YdbCluster] = field(default_factory=list) diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/clickhouse.py b/ydb/library/yql/providers/generic/connector/tests/utils/types/clickhouse.py similarity index 61% rename from ydb/library/yql/providers/generic/connector/tests/utils/clickhouse.py rename to ydb/library/yql/providers/generic/connector/tests/utils/types/clickhouse.py index 991b808c1cb8..342bb2c35137 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/clickhouse.py +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/clickhouse.py @@ -1,33 +1,4 @@ -from typing import TypeAlias import abc -from datetime import datetime -import sys -import time - -import clickhouse_connect -from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings - -Client: TypeAlias = clickhouse_connect.driver.client.Client - - -def make_client(s: Settings.ClickHouse) -> Client: - start = datetime.now() - attempt = 0 - - while (datetime.now() - start).total_seconds() < 60: - attempt += 1 - try: - client = clickhouse_connect.get_client( - host=s.host_external, port=s.http_port_external, username=s.username, password=s.password - ) - except Exception as e: - sys.stderr.write(f"attempt #{attempt}: {e}\n") - time.sleep(5) - continue - - return client - - raise Exception(f"Failed to connect ClickHouse in {attempt} attempt(s)") class Type(abc.ABC): diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py b/ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py new file mode 100644 index 000000000000..d45b169cf5ab --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/postgresql.py @@ -0,0 +1,120 @@ +import abc + + +class Type(abc.ABC): + @abc.abstractmethod + def to_sql(self) -> str: + pass + + +class PrimitiveType(Type): + def to_sql(self): + return type(self).__name__.lower() + + +class Boolean(PrimitiveType): + pass + + +class Bool(PrimitiveType): + pass + + +class SmallInt(PrimitiveType): + pass + + +class Int2(PrimitiveType): + pass + + +class SmallSerial(PrimitiveType): + pass + + +class Serial2(PrimitiveType): + pass + + +class Integer(PrimitiveType): + pass + + +class Int(PrimitiveType): + pass + + +class Int4(PrimitiveType): + pass + + +class Serial(PrimitiveType): + pass + + +class Serial4(PrimitiveType): + pass + + +class BigInt(PrimitiveType): + pass + + +class Int8(PrimitiveType): + pass + + +class BigSerial(PrimitiveType): + pass + + +class Serial8(PrimitiveType): + pass + + +class Real(PrimitiveType): + pass + + +class Float4(PrimitiveType): + pass + + +class DoublePrecision(PrimitiveType): + def to_sql(self): + return 'double precision' + + +class Float8(PrimitiveType): + pass + + +class Bytea(PrimitiveType): + pass + + +class Character(PrimitiveType): + def to_sql(self): + return 'character (5)' + + +class CharacterVarying(PrimitiveType): + def to_sql(self): + return 'character varying (5)' + + +class Text(PrimitiveType): + pass + + +class TimestampWithoutTimeZone(PrimitiveType): + def to_sql(self): + return 'timestamp without time zone' + + +class Date(PrimitiveType): + pass + + +class Time(PrimitiveType): + pass diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make new file mode 100644 index 000000000000..a5d5840bf551 --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/ya.make @@ -0,0 +1,9 @@ +PY3_LIBRARY() + +PY_SRCS( + clickhouse.py + postgresql.py + ydb.py +) + +END() diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py b/ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py new file mode 100644 index 000000000000..9619a224abdb --- /dev/null +++ b/ydb/library/yql/providers/generic/connector/tests/utils/types/ydb.py @@ -0,0 +1,79 @@ +import abc + + +class Type(abc.ABC): + @abc.abstractmethod + def to_sql(self) -> str: + pass + + +class PrimitiveType(Type): + def to_sql(self): + return type(self).__name__ + + @classmethod + def to_nullable(cls): + return Nullable(cls()) + + +class Bool(PrimitiveType): + pass + + +class Int8(PrimitiveType): + pass + + +class Int16(PrimitiveType): + pass + + +class Int32(PrimitiveType): + pass + + +class Int64(PrimitiveType): + pass + + +class UInt8(PrimitiveType): + pass + + +class UInt16(PrimitiveType): + pass + + +class UInt32(PrimitiveType): + pass + + +class UInt64(PrimitiveType): + pass + + +class Float(PrimitiveType): + pass + + +class Double(PrimitiveType): + pass + + +class String(PrimitiveType): + pass + + +class FixedString(PrimitiveType): + def to_sql(self) -> str: + return "FixedString(5)" + + +class Nullable(Type): + primitive: PrimitiveType + + def __init__(self, primitive: PrimitiveType): + self.primitive = primitive + + def to_sql(self) -> str: + return f'Nullable({self.primitive.to_sql()})' diff --git a/ydb/library/yql/providers/generic/connector/tests/utils/ya.make b/ydb/library/yql/providers/generic/connector/tests/utils/ya.make index 6d49965f3509..4d8f719e3f15 100644 --- a/ydb/library/yql/providers/generic/connector/tests/utils/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/utils/ya.make @@ -1,29 +1,31 @@ PY3_LIBRARY() -STYLE_PYTHON() - PY_SRCS( artifacts.py - clickhouse.py comparator.py data_source_kind.py database.py - dqrun.py docker_compose.py generate.py - kqprun.py log.py - postgresql.py - runner.py schema.py settings.py sql.py ) PEERDIR( + contrib/python/PyYAML ydb/library/yql/providers/generic/connector/api/common + ydb/library/yql/providers/generic/connector/tests/utils/types ydb/public/api/protos yt/python/yt/yson ) END() + +RECURSE_FOR_TESTS( + clients + run + scenario + types +) diff --git a/ydb/library/yql/providers/generic/connector/tests/ya.make b/ydb/library/yql/providers/generic/connector/tests/ya.make index 0100555d73cd..a4d804cfdcfe 100644 --- a/ydb/library/yql/providers/generic/connector/tests/ya.make +++ b/ydb/library/yql/providers/generic/connector/tests/ya.make @@ -1,74 +1,6 @@ -PY3TEST() - -STYLE_PYTHON() -NO_CHECK_IMPORTS() - -DATA(arcadia/ydb/library/yql/providers/generic/connector/tests/docker-compose.yml) - -IF (AUTOCHECK) - # Split tests to chunks only when they're running on different machines with distbuild, - # otherwise this directive will slow down local test execution. - # Look through https://st.yandex-team.ru/DEVTOOLSSUPPORT-39642 for more information. - FORK_SUBTESTS() - - # TAG and REQUIREMENTS are copied from: https://docs.yandex-team.ru/devtools/test/environment#docker-compose - TAG( - ya:external - ya:force_sandbox - ya:fat - ) - - REQUIREMENTS( - container:4467981730 - cpu:all - dns:dns64 - ) -ENDIF() - -INCLUDE(${ARCADIA_ROOT}/library/recipes/docker_compose/recipe.inc) - -# Including of docker_compose/recipe.inc automatically converts these tests into LARGE, -# which makes it impossible to run them during precommit checks on Github CI. -# Next several lines forces these tests to be MEDIUM. To see discussion, visit YDBOPS-8928. - -IF (OPENSOURCE) - SIZE(MEDIUM) - SET(TEST_TAGS_VALUE) - SET(TEST_REQUIREMENTS_VALUE) -ENDIF() - -TEST_SRCS( - conftest.py - clickhouse.py - postgresql.py - test.py -) - -PEERDIR( - contrib/python/Jinja2 - contrib/python/clickhouse-connect - contrib/python/grpcio - contrib/python/pg8000 - contrib/python/pytest - contrib/python/tzlocal - ydb/library/yql/providers/generic/connector/api/common - ydb/library/yql/providers/generic/connector/api/service - ydb/library/yql/providers/generic/connector/api/service/protos - ydb/library/yql/providers/generic/connector/tests/test_cases - ydb/library/yql/providers/generic/connector/tests/utils - ydb/public/api/protos - yt/python/yt/yson -) - -DEPENDS( - ydb/library/yql/tools/dqrun - ydb/tests/tools/kqprun - library/recipes/docker_compose/bin -) - -END() - RECURSE_FOR_TESTS( - test_cases + common_test_cases + datasource + join utils ) diff --git a/ydb/library/yql/providers/generic/proto/source.proto b/ydb/library/yql/providers/generic/proto/source.proto index 0911dd54ef26..725b0815698f 100644 --- a/ydb/library/yql/providers/generic/proto/source.proto +++ b/ydb/library/yql/providers/generic/proto/source.proto @@ -5,15 +5,19 @@ option cc_enable_arenas = true; package NYql.Generic; import "ydb/library/yql/providers/generic/connector/api/service/protos/connector.proto"; -import "ydb/library/yql/providers/generic/connector/api/common/data_source.proto"; message TSource { - // Token to access database - // FIXME: unused field, delete it: - string token = 1; // Prepared Select expression NYql.NConnector.NApi.TSelect select = 2; - // Description of instance to connect - // FIXME: DataSourceInstance is already incapsulated into select, delete it: - NYql.NConnector.NApi.TDataSourceInstance data_source_instance = 3; -} \ No newline at end of file + + // Credentials used to access managed databases APIs. + // When working with external data source instances deployed in clouds, + // one should either set (ServiceAccountId, ServiceAccountIdSignature) pair + // that will be resolved into IAM Token via Token Accessor, + // or provide IAM Token directly. + string ServiceAccountId = 4; + string ServiceAccountIdSignature = 5; + string Token = 6; + + reserved 1, 3; +} diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp index a32ce9dc1215..48bb17d52670 100644 --- a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp +++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp @@ -180,7 +180,7 @@ class TBuildDqSourceSettingsTransformer: public TOptimizeTransformerBase { .Ptr(); ::google::protobuf::Any settings; TString sourceType; - dqIntegration->FillSourceSettings(*dqSourceNode, settings, sourceType); + dqIntegration->FillSourceSettings(*dqSourceNode, settings, sourceType, 1); UNIT_ASSERT_STRINGS_EQUAL(sourceType, "PostgreSqlGeneric"); UNIT_ASSERT(settings.Is()); settings.UnpackTo(DqSourceSettings_); @@ -243,6 +243,7 @@ struct TPushdownFixture: public NUnitTest::TBaseFixture { TypesCtx.Get(), FunctionRegistry.Get(), DatabaseResolver, + nullptr, GenericClient, GatewaysCfg.GetGeneric()); diff --git a/ydb/library/yql/providers/generic/provider/ya.make b/ydb/library/yql/providers/generic/provider/ya.make index ca9d84e6365b..55dd70b153e2 100644 --- a/ydb/library/yql/providers/generic/provider/ya.make +++ b/ydb/library/yql/providers/generic/provider/ya.make @@ -21,6 +21,8 @@ SRCS( yql_generic_settings.cpp yql_generic_state.h yql_generic_state.cpp + yql_generic_utils.h + yql_generic_utils.cpp ) YQL_LAST_ABI_VERSION() @@ -45,13 +47,16 @@ PEERDIR( ydb/library/yql/providers/common/provider ydb/library/yql/providers/common/pushdown ydb/library/yql/providers/common/structured_token + ydb/library/yql/providers/common/token_accessor/client ydb/library/yql/providers/common/transform ydb/library/yql/providers/dq/common ydb/library/yql/providers/dq/expr_nodes ydb/library/yql/providers/generic/expr_nodes ydb/library/yql/providers/generic/proto + ydb/library/yql/providers/generic/connector/api/common ydb/library/yql/providers/generic/connector/libcpp ydb/library/yql/utils/plan + ydb/public/sdk/cpp/client/ydb_types/credentials ) END() diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp index 249e3e1002ff..886b493076ee 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -11,8 +12,6 @@ #include "yql_generic_cluster_config.h" namespace NYql { - using namespace NConnector; - using namespace NConnector::NApi; using namespace fmt::literals; void ParseLogin( @@ -20,7 +19,8 @@ namespace NYql { NYql::TGenericClusterConfig& clusterConfig) { auto it = properties.find("login"); if (it == properties.cend()) { - ythrow yexception() << "missing 'LOGIN' value"; + // It's OK not to have credentials for base auth + return; } if (!it->second) { @@ -35,7 +35,8 @@ namespace NYql { NYql::TGenericClusterConfig& clusterConfig) { auto it = properties.find("password"); if (it == properties.cend()) { - ythrow yexception() << "missing 'PASSWORD' value"; + // It's OK not to have credentials for base auth + return; } clusterConfig.MutableCredentials()->Mutablebasic()->Setpassword(it->second); @@ -157,6 +158,25 @@ namespace NYql { clusterConfig.SetDatabaseId(it->second); } + void ParseDatabaseId(const THashMap& properties, + NYql::TGenericClusterConfig& clusterConfig) { + auto it = properties.find("database_id"); + if (it == properties.cend()) { + return; + } + + if (!it->second) { + // DATABASE_ID is an optional field + return; + } + + if (!it->second) { + ythrow yexception() << "invalid 'DATABASE_ID' value: '" << it->second << "'"; + } + + clusterConfig.SetDatabaseId(it->second); + } + void ParseSourceType(const THashMap& properties, NYql::TGenericClusterConfig& clusterConfig) { auto it = properties.find("source_type"); @@ -240,20 +260,6 @@ namespace NYql { } TGenericClusterConfig GenericClusterConfigFromProperties(const TString& clusterName, const THashMap& properties) { - // some cross-parameter validations - auto location = KeyIsSet(properties, "location"); - auto mdbClusterId = KeyIsSet(properties, "mdb_cluster_id"); - - if ((location && mdbClusterId) || (!location and !mdbClusterId)) { - ythrow yexception() << "you must provide either 'LOCATION' or 'MDB_CLUSTER_ID' parameter"; - } - - auto serviceAccountId = KeyIsSet(properties, "serviceAccountId"); - auto serviceAccountIdSignature = KeyIsSet(properties, "serviceAccountIdSignature"); - if ((serviceAccountId && !serviceAccountIdSignature) || (!serviceAccountId && serviceAccountIdSignature)) { - ythrow yexception() << "you must provide either both 'SERVICE_ACCOUNT_ID' and 'SERVICE_ACCOUNT_ID_SIGNATURE' parameters or none of them"; - } - NYql::TGenericClusterConfig clusterConfig; clusterConfig.set_name(clusterName); ParseLogin(properties, clusterConfig); @@ -263,6 +269,7 @@ namespace NYql { ParseDatabaseName(properties, clusterConfig); ParseSchema(properties, clusterConfig); ParseMdbClusterId(properties, clusterConfig); + ParseDatabaseId(properties, clusterConfig); ParseSourceType(properties, clusterConfig); ParseProtocol(properties, clusterConfig); ParseServiceAccountId(properties, clusterConfig); @@ -310,27 +317,16 @@ namespace NYql { "protocol"_a = NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol())); } + static const TSet managedDatabaseKinds{ + NConnector::NApi::EDataSourceKind::POSTGRESQL, + NConnector::NApi::EDataSourceKind::CLICKHOUSE, + NConnector::NApi::EDataSourceKind::YDB}; + void ValidateGenericClusterConfig( const NYql::TGenericClusterConfig& clusterConfig, const TString& context) { - // cross-parameter validations for optional fields - auto hasEndpoint = clusterConfig.HasEndpoint(); - auto databaseId = clusterConfig.GetDatabaseId(); - - if ((hasEndpoint && databaseId)) { - return ValidationError( - clusterConfig, - context, - "both 'Endpoint' and 'DatabaseId' fields are set; you must set only one of them"); - } - - if (!hasEndpoint and !databaseId) { - return ValidationError( - clusterConfig, - context, - "none of 'Endpoint' and 'DatabaseId' fields are set; you must set one of them"); - } - + // Service account ID and service account ID signature are tightly coupled: + // if one is set, another one must be set too. auto serviceAccountId = clusterConfig.GetServiceAccountId(); auto serviceAccountIdSignature = clusterConfig.GetServiceAccountIdSignature(); if (serviceAccountId && !serviceAccountIdSignature) { @@ -349,6 +345,8 @@ namespace NYql { "you must set either both 'ServiceAccountId' and 'ServiceAccountIdSignature' fields or none of them"); } + // Service account credentials and raw tokens are mutually exclusive: + // no need to specify service account parameters if one already has a token. auto token = clusterConfig.GetToken(); if ((serviceAccountId && serviceAccountIdSignature) && token) { return ValidationError( @@ -357,23 +355,60 @@ namespace NYql { "you must set either ('ServiceAccountId', 'ServiceAccountIdSignature') fields or 'Token' field or none of them"); } + // All managed databases: + // * set endpoint when working with on-prem instances + // * set database id when working with managed instances + if (managedDatabaseKinds.contains(clusterConfig.GetKind())) { + auto hasEndpoint = clusterConfig.HasEndpoint(); + auto hasDatabaseId = clusterConfig.HasDatabaseId(); + + if (hasEndpoint && hasDatabaseId) { + return ValidationError( + clusterConfig, + context, + "both 'Endpoint' and 'DatabaseId' fields are set; you must set only one of them"); + } + + if (!hasEndpoint and !hasDatabaseId) { + return ValidationError( + clusterConfig, + context, + "none of 'Endpoint' and 'DatabaseId' fields are set; you must set one of them"); + } + } + + // YDB: + // * set database name when working with on-prem YDB instance; + // * but set database ID when working with managed YDB. + if (clusterConfig.GetKind() == NConnector::NApi::YDB) { + if (clusterConfig.HasDatabaseName() && clusterConfig.HasDatabaseId()) { + return ValidationError( + clusterConfig, + context, + "For YDB clusters you must set either database name or database id, but you have set both of them"); + } + + if (!clusterConfig.HasDatabaseName() && !clusterConfig.HasDatabaseId()) { + return ValidationError( + clusterConfig, + context, + "For YDB clusters you must set either database name or database id, but you have set none of them"); + } + } + // check required fields if (!clusterConfig.GetName()) { return ValidationError(clusterConfig, context, "empty field 'Name'"); } - if (clusterConfig.GetKind() == EDataSourceKind::DATA_SOURCE_KIND_UNSPECIFIED) { + if (clusterConfig.GetKind() == NConnector::NApi::EDataSourceKind::DATA_SOURCE_KIND_UNSPECIFIED) { return ValidationError(clusterConfig, context, "empty field 'Kind'"); } - if (!clusterConfig.GetCredentials().Getbasic().Getusername()) { - return ValidationError(clusterConfig, context, "empty field 'Credentials.basic.username'"); - } - // TODO: validate Credentials.basic.password after ClickHouse recipe fix // TODO: validate DatabaseName field during https://st.yandex-team.ru/YQ-2494 - if (clusterConfig.GetProtocol() == EProtocol::PROTOCOL_UNSPECIFIED) { + if (clusterConfig.GetProtocol() == NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED) { return ValidationError(clusterConfig, context, "empty field 'Protocol'"); } } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp index d00f82047c37..74a6bd819177 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp @@ -93,17 +93,16 @@ namespace NYql { } void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, - TString& sourceType) override { + TString& sourceType, size_t) override { const TDqSource source(&node); if (const auto maybeSettings = source.Settings().Maybe()) { const auto settings = maybeSettings.Cast(); const auto& clusterName = source.DataSource().Cast().Cluster().StringValue(); const auto& table = settings.Table().StringValue(); - const auto& token = settings.Token().Name().StringValue(); - const auto& endpoint = State_->Configuration->ClusterNamesToClusterConfigs[clusterName].endpoint(); + const auto& clusterConfig = State_->Configuration->ClusterNamesToClusterConfigs[clusterName]; + const auto& endpoint = clusterConfig.endpoint(); - Generic::TSource srcDesc; - srcDesc.set_token(token); + Generic::TSource source; // for backward compability full path can be used (cluster_name.`db_name.table`) // TODO: simplify during https://st.yandex-team.ru/YQ-2494 @@ -126,7 +125,7 @@ namespace NYql { } // prepare select - auto select = srcDesc.mutable_select(); + auto select = source.mutable_select(); select->mutable_from()->set_table(TString(dbTable)); select->mutable_data_source_instance()->CopyFrom(tableMeta.value()->DataSourceInstance); @@ -149,13 +148,22 @@ namespace NYql { } } - // store data source instance - srcDesc.mutable_data_source_instance()->CopyFrom(tableMeta.value()->DataSourceInstance); + // Managed YDB supports access via IAM token. + // If exist, copy service account creds to obtain tokens during request execution phase. + // If exists, copy previously created token. + if (clusterConfig.kind() == NConnector::NApi::EDataSourceKind::YDB) { + source.SetServiceAccountId(clusterConfig.GetServiceAccountId()); + source.SetServiceAccountIdSignature(clusterConfig.GetServiceAccountIdSignature()); + source.SetToken(State_->Types->Credentials->FindCredentialContent( + "default_" + clusterConfig.name(), + "default_generic", + clusterConfig.GetToken())); + } // preserve source description for read actor - protoSettings.PackFrom(srcDesc); + protoSettings.PackFrom(source); - switch (srcDesc.data_source_instance().kind()) { + switch (select->data_source_instance().kind()) { case NYql::NConnector::NApi::CLICKHOUSE: sourceType = "ClickHouseGeneric"; break; diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp index 2089adbc798e..ae9e504f3c1b 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_io_discovery.cpp @@ -1,4 +1,6 @@ #include "yql_generic_provider_impl.h" +#include "yql_generic_utils.h" + #include #include #include @@ -91,6 +93,7 @@ namespace NYql { for (const auto& [databaseIdWithType, databaseDescription] : response.DatabaseDescriptionMap) { YQL_CLOG(INFO, ProviderGeneric) << "resolved database id into endpoint" << ": databaseId=" << databaseIdWithType.first + << ", databaseKind=" << databaseIdWithType.second << ", host=" << databaseDescription.Host << ", port=" << databaseDescription.Port; } @@ -157,7 +160,10 @@ namespace NYql { if (clusterConfigIter == clusterNamesToClusterConfigs.end()) { TIssues issues; - issues.AddIssue(TStringBuilder() << "no cluster names for database id " << databaseIdWithType.first << " and cluster name " << clusterName); + issues.AddIssue(TStringBuilder() << "no cluster names for database id " + << databaseIdWithType.first + << " and cluster name " + << clusterName); ctx.IssueManager.AddIssues(issues); return TStatus::Error; } @@ -165,6 +171,15 @@ namespace NYql { auto endpointDst = clusterConfigIter->second.mutable_endpoint(); endpointDst->set_host(databaseDescription.Host); endpointDst->set_port(databaseDescription.Port); + + // If we work with managed YDB, we find out database name + // only after database id (== cluster id) resolving. + if (clusterConfigIter->second.kind() == NConnector::NApi::EDataSourceKind::YDB) { + clusterConfigIter->second.set_databasename(databaseDescription.Database); + } + + YQL_CLOG(INFO, ProviderGeneric) << "ModifyClusterConfigs: " + << DumpGenericClusterConfig(clusterConfigIter->second); } } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp index 929884754b96..383e342c1523 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_load_meta.cpp @@ -14,10 +14,11 @@ #include #include #include -#include -#include +#include #include #include +#include +#include namespace NYql { using namespace NNodes; @@ -32,7 +33,8 @@ namespace NYql { }; class TGenericLoadTableMetadataTransformer: public TGraphTransformerBase { - using TMapType = std::unordered_map>; + using TMapType = + std::unordered_map>; public: TGenericLoadTableMetadataTransformer(TGenericState::TPtr state) @@ -48,42 +50,37 @@ namespace NYql { } std::unordered_set pendingTables; - const auto& reads = FindNodes(input, - [&](const TExprNode::TPtr& node) { - if (const auto maybeRead = TMaybeNode(node)) { - return maybeRead.Cast().DataSource().Category().Value() == GenericProviderName; - } - return false; - }); + const auto& reads = FindNodes(input, [&](const TExprNode::TPtr& node) { + if (const auto maybeRead = TMaybeNode(node)) { + return maybeRead.Cast().DataSource().Category().Value() == GenericProviderName; + } + return false; + }); if (!reads.empty()) { for (const auto& r : reads) { const TGenRead read(r); if (!read.FreeArgs().Get(2).Ref().IsCallable("MrTableConcat")) { - ctx.AddError( - TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), TStringBuilder() << "Expected key")); + ctx.AddError(TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), "Expected key")); return TStatus::Error; } const auto maybeKey = TExprBase(read.FreeArgs().Get(2).Ref().HeadPtr()).Maybe(); if (!maybeKey) { - ctx.AddError( - TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), TStringBuilder() << "Expected key")); + ctx.AddError(TIssue(ctx.GetPosition(read.FreeArgs().Get(0).Pos()), "Expected key")); return TStatus::Error; } const auto& keyArg = maybeKey.Cast().Ref().Head(); if (!keyArg.IsList() || keyArg.ChildrenSize() != 2U || !keyArg.Head().IsAtom("table") || !keyArg.Tail().IsCallable(TCoString::CallableName())) { - ctx.AddError( - TIssue(ctx.GetPosition(keyArg.Pos()), TStringBuilder() << "Expected single table name")); + ctx.AddError(TIssue(ctx.GetPosition(keyArg.Pos()), "Expected single table name")); return TStatus::Error; } const auto clusterName = read.DataSource().Cluster().StringValue(); const auto tableName = TString(keyArg.Tail().Head().Content()); if (pendingTables.insert(TGenericState::TTableAddress(clusterName, tableName)).second) { - YQL_CLOG(INFO, ProviderGeneric) - << "Loading table meta for: `" << clusterName << "`.`" << tableName << "`"; + YQL_CLOG(INFO, ProviderGeneric) << "Loading table meta for: `" << clusterName << "`.`" << tableName << "`"; } } } @@ -108,6 +105,7 @@ namespace NYql { auto desc = emplaceIt.first->second; desc->DataSourceInstance = request.data_source_instance(); + Y_ENSURE(State_->GenericClient); State_->GenericClient->DescribeTable(request).Subscribe( [desc = std::move(desc), promise = std::move(promise)](const NConnector::TDescribeTableAsyncResult& f1) mutable { NConnector::TDescribeTableAsyncResult f2(f1); @@ -196,14 +194,13 @@ namespace NYql { } else { const auto& error = response.error(); NConnector::ErrorToExprCtx(error, ctx, ctx.GetPosition(read.Pos()), - TStringBuilder() - << "Loading metadata for table: " << clusterName << '.' << tableName); + TStringBuilder() << "Loading metadata for table: " << clusterName << '.' << tableName); hasErrors = true; break; } } else { - ctx.AddError(TIssue(ctx.GetPosition(read.Pos()), - TStringBuilder() << "Not found result for " << clusterName << '.' << tableName)); + ctx.AddError(TIssue(ctx.GetPosition(read.Pos()), TStringBuilder() + << "Not found result for " << clusterName << '.' << tableName)); hasErrors = true; break; } @@ -222,10 +219,8 @@ namespace NYql { } private: - const TStructExprType* ParseTableMeta(const NConnector::NApi::TSchema& schema, - const std::string_view& cluster, - const std::string_view& table, TExprContext& ctx, - TVector& columnOrder) try { + const TStructExprType* ParseTableMeta(const NConnector::NApi::TSchema& schema, const std::string_view& cluster, + const std::string_view& table, TExprContext& ctx, TVector& columnOrder) try { TVector items; auto columns = schema.columns(); @@ -250,20 +245,74 @@ namespace NYql { return nullptr; } - void FillDescribeTableRequest(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, const TString& tablePath) { + void FillDescribeTableRequest(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, + const TString& tablePath) { const auto dataSourceKind = clusterConfig.GetKind(); auto dsi = request.mutable_data_source_instance(); - *dsi->mutable_endpoint() = clusterConfig.GetEndpoint(); dsi->set_kind(dataSourceKind); - *dsi->mutable_credentials() = clusterConfig.GetCredentials(); dsi->set_use_tls(clusterConfig.GetUseSsl()); dsi->set_protocol(clusterConfig.GetProtocol()); + FillCredentials(request, clusterConfig); FillTypeMappingSettings(request); FillDataSourceOptions(request, clusterConfig); FillTablePath(request, clusterConfig, tablePath); } + void FillCredentials(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig) { + auto dsi = request.mutable_data_source_instance(); + + // If login/password is provided, just copy them into request: + // connector will use Basic Auth to access external data sources. + if (clusterConfig.GetCredentials().Hasbasic()) { + *dsi->mutable_credentials() = clusterConfig.GetCredentials(); + return; + } + + // If there are no Basic Auth parameters, two options can be considered: + + // 1. Client provided own IAM-token to access external data source + auto iamToken = State_->Types->Credentials->FindCredentialContent( + "default_" + clusterConfig.name(), + "default_generic", + clusterConfig.GetToken()); + if (iamToken) { + *dsi->mutable_credentials()->mutable_token()->mutable_value() = iamToken; + *dsi->mutable_credentials()->mutable_token()->mutable_type() = "IAM"; + return; + } + + // 2. Client provided service account creds that must be converted into IAM-token + Y_ENSURE(State_->CredentialsFactory, "CredentialsFactory is not initialized"); + + auto structuredTokenJSON = TStructuredTokenBuilder().SetServiceAccountIdAuth( + clusterConfig.GetServiceAccountId(), + clusterConfig.GetServiceAccountIdSignature()) + .ToJson(); + + Y_ENSURE(structuredTokenJSON, "empty structured token"); + + // Create provider or get existing one. + // It's crucial to reuse providers because their construction implies synchronous IO. + auto providersIt = State_->CredentialProviders.find(clusterConfig.name()); + if (providersIt == State_->CredentialProviders.end()) { + auto credentialsProviderFactory = CreateCredentialsProviderFactoryForStructuredToken( + State_->CredentialsFactory, + structuredTokenJSON, + false); + + providersIt = State_->CredentialProviders.emplace( + std::make_pair(clusterConfig.name(), credentialsProviderFactory->CreateProvider())) + .first; + } + + iamToken = providersIt->second->GetAuthInfo(); + Y_ENSURE(iamToken, "empty IAM token"); + + *dsi->mutable_credentials()->mutable_token()->mutable_value() = iamToken; + *dsi->mutable_credentials()->mutable_token()->mutable_type() = "IAM"; + } + void FillDataSourceOptions(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig) { const auto dataSourceKind = clusterConfig.GetKind(); switch (dataSourceKind) { @@ -287,13 +336,14 @@ namespace NYql { } break; default: - ythrow yexception() << "Unexpected data source kind: '" - << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) << "'"; + ythrow yexception() << "Unexpected data source kind: '" << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) + << "'"; } } void FillTypeMappingSettings(NConnector::NApi::TDescribeTableRequest& request) { - const TString dateTimeFormat = State_->Configuration->DateTimeFormat.Get().GetOrElse(TGenericSettings::TDefault::DateTimeFormat); + const TString dateTimeFormat = + State_->Configuration->DateTimeFormat.Get().GetOrElse(TGenericSettings::TDefault::DateTimeFormat); if (dateTimeFormat == "string") { request.mutable_type_mapping_settings()->set_date_time_format(NConnector::NApi::STRING_FORMAT); } else if (dateTimeFormat == "YQL") { @@ -303,7 +353,8 @@ namespace NYql { } } - void FillTablePath(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, const TString& tablePath) { + void FillTablePath(NConnector::NApi::TDescribeTableRequest& request, const TGenericClusterConfig& clusterConfig, + const TString& tablePath) { // for backward compability full path can be used (cluster_name.`db_name.table`) // TODO: simplify during https://st.yandex-team.ru/YQ-2494 const auto dataSourceKind = clusterConfig.GetKind(); diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp index d2178ccd5b0e..c720e1b64d0e 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp @@ -6,13 +6,14 @@ namespace NYql { TDataProviderInitializer GetGenericDataProviderInitializer(NConnector::IClient::TPtr genericClient, - const std::shared_ptr dbResolver) + const IDatabaseAsyncResolver::TPtr& dbResolver, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory) { - return [genericClient, dbResolver](const TString& userName, const TString& sessionId, const TGatewaysConfig* gatewaysConfig, - const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, - TIntrusivePtr randomProvider, TIntrusivePtr typeCtx, - const TOperationProgressWriter& progressWriter, const TYqlOperationOptions& operationOptions, - THiddenQueryAborter) + return [genericClient, dbResolver, credentialsFactory](const TString& userName, const TString& sessionId, const TGatewaysConfig* gatewaysConfig, + const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, + TIntrusivePtr randomProvider, TIntrusivePtr typeCtx, + const TOperationProgressWriter& progressWriter, const TYqlOperationOptions& operationOptions, + THiddenQueryAborter) { Y_UNUSED(sessionId); Y_UNUSED(userName); @@ -25,6 +26,7 @@ namespace NYql { typeCtx.Get(), functionRegistry, dbResolver, + credentialsFactory, genericClient, gatewaysConfig->GetGeneric()); diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_provider.h b/ydb/library/yql/providers/generic/provider/yql_generic_provider.h index d990b2084bb4..5c8e4c967a8c 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_provider.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_provider.h @@ -2,14 +2,14 @@ #include "yql_generic_state.h" -#include #include #include namespace NYql { TDataProviderInitializer GetGenericDataProviderInitializer( - NConnector::IClient::TPtr genericClient, // required - std::shared_ptr dbResolver = nullptr // can be missing in on-prem installations + NConnector::IClient::TPtr genericClient, // required + const IDatabaseAsyncResolver::TPtr& dbResolver = nullptr, // can be missing in on-prem installations + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory = nullptr // can be missing in on-prem installations ); TIntrusivePtr CreateGenericDataSource(TGenericState::TPtr state); diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp index 0b4c93a8bf4d..1c2521573ddb 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp @@ -1,5 +1,6 @@ #include "yql_generic_cluster_config.h" #include "yql_generic_settings.h" +#include "yql_generic_utils.h" #include #include @@ -34,7 +35,7 @@ namespace NYql { const TCredentials::TPtr& credentials) { ValidateGenericClusterConfig(clusterConfig, "TGenericConfiguration::AddCluster"); - YQL_CLOG(INFO, ProviderGeneric) << "generic provider add cluster: " << DumpGenericClusterConfig(clusterConfig); + YQL_CLOG(INFO, ProviderGeneric) << "GenericConfiguration::AddCluster: " << DumpGenericClusterConfig(clusterConfig); const auto& clusterName = clusterConfig.GetName(); const auto& databaseId = clusterConfig.GetDatabaseId(); @@ -95,23 +96,6 @@ namespace NYql { "or set (ServiceAccountId && ServiceAccountIdSignature) in cluster config"; } - TString TGenericConfiguration::DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig) const { - TStringBuilder sb; - sb << "name = " << clusterConfig.GetName() - << ", kind = " << NConnector::NApi::EDataSourceKind_Name(clusterConfig.GetKind()) - << ", database name = " << clusterConfig.GetDatabaseName() - << ", database id = " << clusterConfig.GetName() - << ", endpoint = " << clusterConfig.GetEndpoint() - << ", use tls = " << clusterConfig.GetUseSsl() - << ", protocol = " << NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol()); - - for (const auto& [key, value] : clusterConfig.GetDataSourceOptions()) { - sb << ", " << key << " = " << value; - } - - return sb; - } - TGenericSettings::TConstPtr TGenericConfiguration::Snapshot() const { return std::make_shared(*this); } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_settings.h b/ydb/library/yql/providers/generic/provider/yql_generic_settings.h index d783963a6589..07a19c5ce827 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_settings.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_settings.h @@ -25,26 +25,21 @@ namespace NYql { TGenericConfiguration(); TGenericConfiguration(const TGenericConfiguration&) = delete; - void Init(const NYql::TGenericGatewayConfig& gatewayConfig, - const std::shared_ptr databaseResolver, - NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, - const TCredentials::TPtr& credentials); + void Init(const NYql::TGenericGatewayConfig& gatewayConfig, const std::shared_ptr databaseResolver, + NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, const TCredentials::TPtr& credentials); - void AddCluster(const TGenericClusterConfig& clusterConfig, - const std::shared_ptr databaseResolver, - NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, - const TCredentials::TPtr& credentials); + void AddCluster(const TGenericClusterConfig& clusterConfig, const std::shared_ptr databaseResolver, + NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, const TCredentials::TPtr& credentials); TGenericSettings::TConstPtr Snapshot() const; bool HasCluster(TStringBuf cluster) const; private: TString MakeStructuredToken(const TGenericClusterConfig& clusterConfig, const TCredentials::TPtr& credentials) const; - TString DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig) const; public: THashMap Tokens; THashMap ClusterNamesToClusterConfigs; // cluster name -> cluster config THashMap> DatabaseIdsToClusterNames; // database id -> cluster name }; -} +} //namespace NYql diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_state.h b/ydb/library/yql/providers/generic/provider/yql_generic_state.h index e2362bc5ad27..3d69efdfe0d3 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_state.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_state.h @@ -3,7 +3,9 @@ #include "yql_generic_settings.h" #include +#include #include +#include namespace NKikimr::NMiniKQL { class IFunctionRegistry; @@ -29,13 +31,15 @@ namespace NYql { TGenericState( TTypeAnnotationContext* types, const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, - const std::shared_ptr& databaseResolver, + const std::shared_ptr& databaseResolver, + const ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory, const NConnector::IClient::TPtr& genericClient, const TGenericGatewayConfig& gatewayConfig) : Types(types) , Configuration(MakeIntrusive()) , FunctionRegistry(functionRegistry) , DatabaseResolver(databaseResolver) + , CredentialsFactory(credentialsFactory) , GenericClient(genericClient) { Configuration->Init(gatewayConfig, databaseResolver, DatabaseAuth, types->Credentials); @@ -49,9 +53,15 @@ namespace NYql { TGenericConfiguration::TPtr Configuration = MakeIntrusive(); const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry; - // key - (database id, database type), value - credentials to access MDB API - NYql::IDatabaseAsyncResolver::TDatabaseAuthMap DatabaseAuth; - std::shared_ptr DatabaseResolver; + // key - (database id, database type), value - credentials to access managed APIs + IDatabaseAsyncResolver::TDatabaseAuthMap DatabaseAuth; + std::shared_ptr DatabaseResolver; + + // key - cluster name, value - TCredentialsProviderPtr + // It's important to cache credentials providers, because they make IO + // (synchronous call via Token Accessor client) during the construction. + std::unordered_map CredentialProviders; + ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory; NConnector::IClient::TPtr GenericClient; diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp new file mode 100644 index 000000000000..aba0b51924b3 --- /dev/null +++ b/ydb/library/yql/providers/generic/provider/yql_generic_utils.cpp @@ -0,0 +1,22 @@ +#include "yql_generic_utils.h" + +#include + +namespace NYql { + TString DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig) { + TStringBuilder sb; + sb << "name = " << clusterConfig.GetName() + << ", kind = " << NConnector::NApi::EDataSourceKind_Name(clusterConfig.GetKind()) + << ", database name = " << clusterConfig.GetDatabaseName() + << ", database id = " << clusterConfig.GetDatabaseId() + << ", endpoint = " << clusterConfig.GetEndpoint() + << ", use tls = " << clusterConfig.GetUseSsl() + << ", protocol = " << NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol()); + + for (const auto& [key, value] : clusterConfig.GetDataSourceOptions()) { + sb << ", " << key << " = " << value; + } + + return sb; + } +} diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_utils.h b/ydb/library/yql/providers/generic/provider/yql_generic_utils.h new file mode 100644 index 000000000000..49c6bab7abca --- /dev/null +++ b/ydb/library/yql/providers/generic/provider/yql_generic_utils.h @@ -0,0 +1,8 @@ +#pragma once + +#include +#include + +namespace NYql { + TString DumpGenericClusterConfig(const TGenericClusterConfig& clusterConfig); +} diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp index f8b58da69894..f98c58d173d6 100644 --- a/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp +++ b/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp @@ -179,7 +179,7 @@ class TPqDqIntegration: public TDqIntegrationBase { } } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override { if (auto maybeDqSource = TMaybeNode(&node)) { auto settings = maybeDqSource.Cast().Settings(); if (auto maybeTopicSource = TMaybeNode(settings.Raw())) { diff --git a/ydb/library/yql/providers/s3/actors/ya.make b/ydb/library/yql/providers/s3/actors/ya.make index 721495f7e054..8ffe41cf0ddf 100644 --- a/ydb/library/yql/providers/s3/actors/ya.make +++ b/ydb/library/yql/providers/s3/actors/ya.make @@ -20,9 +20,11 @@ PEERDIR( contrib/libs/fmt contrib/libs/poco/Util ydb/library/actors/http + library/cpp/protobuf/util library/cpp/string_utils/base64 library/cpp/string_utils/quote library/cpp/xml/document + ydb/core/base ydb/core/fq/libs/events ydb/library/yql/dq/actors/compute ydb/library/yql/minikql/computation @@ -36,6 +38,8 @@ PEERDIR( ydb/library/yql/providers/s3/credentials ydb/library/yql/providers/s3/object_listers ydb/library/yql/providers/s3/proto + ydb/library/yql/providers/s3/range_helpers + ydb/library/yql/public/issue ydb/library/yql/public/types ydb/library/yql/udfs/common/clickhouse/client ) diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp index 53e189430213..46f997504e97 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp @@ -204,6 +204,8 @@ class TS3ApplicatorActor; using TObjectStorageRequest = std::function; class TS3ApplicatorActor : public NActors::TActorBootstrapped { + static constexpr ui64 GLOBAL_RETRY_LIMIT = 100; + public: using NActors::TActorBootstrapped::Send; @@ -230,7 +232,7 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedCreateRetryState()->GetNextRetryDelay(curlResponseCode, httpResponseCode); + Issues.AddIssue(TStringBuilder() << "Retry operation " << operationName << ", curl error: " << curl_easy_strerror(curlResponseCode) << ", http code: " << httpResponseCode << ", url: " << url); if (result) { RetryCount--; } else { - Finish(true); + Finish(true, RetryCount + ? TString("Number of retries exceeded limit per operation") + : TStringBuilder() << "Number of retries exceeded global limit in " << GLOBAL_RETRY_LIMIT << " retries"); } return result; } @@ -370,8 +375,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("CommitMultipartUpload ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "CommitMultipartUpload")) { PushCommitMultipartUpload(ev->Get()->State); } } @@ -444,8 +450,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("ListMultipartUploads ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "ListMultipartUploads")) { PushListMultipartUploads(ev->Get()->State); } } @@ -467,8 +474,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("AbortMultipartUpload ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "AbortMultipartUpload")) { PushAbortMultipartUpload(ev->Get()->State); } } @@ -507,8 +515,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl()); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode)) { + const TString& url = ev->Get()->State->BuildUrl(); + LOG_D("ListParts ERROR " << url); + if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "ListParts")) { PushListParts(ev->Get()->State); } } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp index 695588bd1540..8a5454368bfb 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp @@ -31,6 +31,9 @@ #include #include +#include +#include + #endif #include "yql_arrow_column_converters.h" @@ -38,9 +41,11 @@ #include "yql_s3_read_actor.h" #include "yql_s3_source_factory.h" +#include #include #include +#include #include #include #include @@ -51,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +68,7 @@ #include #include #include +#include #include #include @@ -138,19 +145,78 @@ struct TS3ReadError : public yexception { using yexception::yexception; }; -struct TObjectPath { - TString Path; - size_t Size; - size_t PathIndex; +using NS3::FileQueue::TObjectPath; +using NDqProto::TMessageTransportMeta; + +struct TEvS3FileQueue { + enum EEv : ui32 { + EvBegin = EventSpaceBegin(NKikimr::TKikimrEvents::ES_S3_FILE_QUEUE), - TObjectPath(TString path, size_t size, size_t pathIndex) - : Path(std::move(path)), Size(size), PathIndex(pathIndex) { } + EvUpdateConsumersCount = EvBegin, + EvAck, + EvGetNextBatch, + EvObjectPathBatch, + EvObjectPathReadError, + + EvEnd + }; + static_assert(EvEnd < EventSpaceEnd(NKikimr::TKikimrEvents::ES_S3_FILE_QUEUE), + "expect EvEnd < EventSpaceEnd(TEvents::ES_S3_FILE_QUEUE)"); + + struct TEvUpdateConsumersCount : + public TEventPB { + + explicit TEvUpdateConsumersCount(ui64 consumersCountDelta = 0) { + Record.SetConsumersCountDelta(consumersCountDelta); + } + }; + + struct TEvAck : + public TEventPB { + + TEvAck() = default; + + explicit TEvAck(const TMessageTransportMeta& transportMeta) { + Record.MutableTransportMeta()->CopyFrom(transportMeta); + } + }; + + struct TEvGetNextBatch : + public TEventPB { + }; + + struct TEvObjectPathBatch : + public NActors::TEventPB { + + TEvObjectPathBatch() { + Record.SetNoMoreFiles(false); + } + + TEvObjectPathBatch(std::vector objectPaths, bool noMoreFiles, const TMessageTransportMeta& transportMeta) { + Record.MutableObjectPaths()->Assign( + std::make_move_iterator(objectPaths.begin()), + std::make_move_iterator(objectPaths.end())); + Record.SetNoMoreFiles(noMoreFiles); + Record.MutableTransportMeta()->CopyFrom(transportMeta); + } + }; + + struct TEvObjectPathReadError : + public NActors::TEventPB { + + TEvObjectPathReadError() = default; + + TEvObjectPathReadError(TIssues issues, const TMessageTransportMeta& transportMeta) { + IssuesToMessage(issues, Record.MutableIssues()); + Record.MutableTransportMeta()->CopyFrom(transportMeta); + } + }; }; struct TEvPrivate { // Event ids enum EEv : ui32 { - EvBegin = EventSpaceBegin(TEvents::ES_PRIVATE), + EvBegin = TEvRetryQueuePrivate::EvEnd, // Leave space for RetryQueue events EvReadResult = EvBegin, EvDataPart, @@ -162,8 +228,6 @@ struct TEvPrivate { EvNextRecordBatch, EvFileFinished, EvContinue, - EvObjectPathBatch, - EvObjectPathReadError, EvReadResult2, EvEnd @@ -265,21 +329,6 @@ struct TEvPrivate { struct TEvContinue : public NActors::TEventLocal { }; - struct TEvObjectPathBatch : - public NActors::TEventLocal { - std::vector ObjectPaths; - bool NoMoreFiles = false; - TEvObjectPathBatch( - std::vector objectPaths, bool noMoreFiles) - : ObjectPaths(std::move(objectPaths)), NoMoreFiles(noMoreFiles) { } - }; - - struct TEvObjectPathReadError : - public NActors::TEventLocal { - TIssues Issues; - TEvObjectPathReadError(TIssues issues) : Issues(std::move(issues)) { } - }; - struct TReadRange { int64_t Offset; int64_t Length; @@ -304,24 +353,33 @@ class TS3FileQueueActor : public TActorBootstrapped { struct TEvPrivatePrivate { enum { - EvGetNextFile = EventSpaceBegin(TEvents::ES_PRIVATE), - EvNextListingChunkReceived, + EvBegin = TEvRetryQueuePrivate::EvEnd, // Leave space for RetryQueue events + + EvNextListingChunkReceived = EvBegin, + EvRoundRobinStageTimeout, + EvTransitToErrorState, + EvEnd }; static_assert( EvEnd <= EventSpaceEnd(TEvents::ES_PRIVATE), "expected EvEnd <= EventSpaceEnd(TEvents::ES_PRIVATE)"); - struct TEvGetNextFile : public TEventLocal { - size_t RequestedAmount = 1; - TEvGetNextFile(size_t requestedAmount) : RequestedAmount(requestedAmount){}; - }; - struct TEvNextListingChunkReceived : - public TEventLocal { + struct TEvNextListingChunkReceived : public TEventLocal { NS3Lister::TListResult ListingResult; TEvNextListingChunkReceived(NS3Lister::TListResult listingResult) : ListingResult(std::move(listingResult)){}; }; + + struct TEvRoundRobinStageTimeout : public TEventLocal { + }; + + struct TEvTransitToErrorState : public TEventLocal { + explicit TEvTransitToErrorState(TIssues&& issues) + : Issues(issues) { + } + TIssues Issues; + }; }; using TBase = TActorBootstrapped; @@ -330,6 +388,10 @@ class TS3FileQueueActor : public TActorBootstrapped { TPathList paths, size_t prefetchSize, ui64 fileSizeLimit, + bool useRuntimeListing, + ui64 consumersCount, + ui64 batchSizeLimit, + ui64 batchObjectCountLimit, IHTTPGateway::TPtr gateway, TString url, TS3Credentials::TAuthInfo authInfo, @@ -340,6 +402,10 @@ class TS3FileQueueActor : public TActorBootstrapped { , PrefetchSize(prefetchSize) , FileSizeLimit(fileSizeLimit) , MaybeIssues(Nothing()) + , UseRuntimeListing(useRuntimeListing) + , ConsumersCount(consumersCount) + , BatchSizeLimit(batchSizeLimit) + , BatchObjectCountLimit(batchObjectCountLimit) , Gateway(std::move(gateway)) , Url(std::move(url)) , AuthInfo(std::move(authInfo)) @@ -347,20 +413,28 @@ class TS3FileQueueActor : public TActorBootstrapped { , PatternVariant(patternVariant) , PatternType(patternType) { for (size_t i = 0; i < paths.size(); ++i) { + TObjectPath object; + object.SetPath(paths[i].Path); + object.SetPathIndex(paths[i].PathIndex); if (paths[i].IsDirectory) { - Directories.emplace_back(paths[i].Path, 0, paths[i].PathIndex); + object.SetSize(0); + Directories.emplace_back(std::move(object)); } else { - Objects.emplace_back(paths[i].Path, paths[i].Size, paths[i].PathIndex); + object.SetSize(paths[i].Size); + Objects.emplace_back(std::move(object)); } } } void Bootstrap() { + if (UseRuntimeListing) { + Schedule(PoisonTimeout, new TEvents::TEvPoison()); + } if (Directories.empty()) { - LOG_I("TS3FileQueueActor", "Bootstrap there is no directories to list"); + LOG_I("TS3FileQueueActor", "Bootstrap there is no directories to list, consumersCount=" << ConsumersCount); Become(&TS3FileQueueActor::NoMoreDirectoriesState); } else { - LOG_I("TS3FileQueueActor", "Bootstrap there are directories to list"); + LOG_I("TS3FileQueueActor", "Bootstrap there are directories to list, consumersCount=" << ConsumersCount); TryPreFetch(); Become(&TS3FileQueueActor::ThereAreDirectoriesToListState); } @@ -369,9 +443,12 @@ class TS3FileQueueActor : public TActorBootstrapped { STATEFN(ThereAreDirectoriesToListState) { try { switch (const auto etype = ev->GetTypeRewrite()) { - hFunc(TEvPrivatePrivate::TEvGetNextFile, HandleGetNextFile); + hFunc(TEvS3FileQueue::TEvUpdateConsumersCount, HandleUpdateConsumersCount); + hFunc(TEvS3FileQueue::TEvGetNextBatch, HandleGetNextBatch); hFunc(TEvPrivatePrivate::TEvNextListingChunkReceived, HandleNextListingChunkReceived); - cFunc(TEvents::TSystem::Poison, PassAway); + cFunc(TEvPrivatePrivate::EvRoundRobinStageTimeout, HandleRoundRobinStageTimeout); + hFunc(TEvPrivatePrivate::TEvTransitToErrorState, HandleTransitToErrorState); + cFunc(TEvents::TSystem::Poison, HandlePoison); default: MaybeIssues = TIssues{TIssue{TStringBuilder() << "An event with unknown type has been received: '" << etype << "'"}}; TransitToErrorState(); @@ -383,16 +460,14 @@ class TS3FileQueueActor : public TActorBootstrapped { } } - void HandleGetNextFile(TEvPrivatePrivate::TEvGetNextFile::TPtr& ev) { - auto requestAmount = ev->Get()->RequestedAmount; - LOG_D("TS3FileQueueActor", "HandleGetNextFile requestAmount:" << requestAmount); - if (Objects.size() > requestAmount) { - LOG_D("TS3FileQueueActor", "HandleGetNextFile sending right away"); - SendObjects(ev->Sender, requestAmount); + void HandleGetNextBatch(TEvS3FileQueue::TEvGetNextBatch::TPtr& ev) { + if (HasEnoughToSend()) { + LOG_D("TS3FileQueueActor", "HandleGetNextBatch sending right away"); + TrySendObjects(ev->Sender, ev->Get()->Record.GetTransportMeta()); TryPreFetch(); } else { - LOG_D("TS3FileQueueActor", "HandleGetNextFile have not enough objects cached. Start fetching"); - RequestQueue.emplace_back(ev->Sender, requestAmount); + LOG_D("TS3FileQueueActor", "HandleGetNextBatch have not enough objects cached. Start fetching"); + ScheduleRequest(ev->Sender, ev->Get()->Record.GetTransportMeta()); TryFetch(); } } @@ -402,12 +477,12 @@ class TS3FileQueueActor : public TActorBootstrapped { ListingFuture = Nothing(); LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived"); if (SaveRetrievedResults(ev->Get()->ListingResult)) { - AnswerPendingRequests(); - if (RequestQueue.empty()) { - LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived RequestQueue is empty. Trying to prefetch"); + AnswerPendingRequests(true); + if (!HasPendingRequests) { + LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived no pending requests. Trying to prefetch"); TryPreFetch(); } else { - LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived RequestQueue is not empty. Fetching more objects"); + LOG_D("TS3FileQueueActor", "HandleNextListingChunkReceived there are pending requests. Fetching more objects"); TryFetch(); } } else { @@ -415,6 +490,11 @@ class TS3FileQueueActor : public TActorBootstrapped { } } + void HandleTransitToErrorState(TEvPrivatePrivate::TEvTransitToErrorState::TPtr& ev) { + MaybeIssues = ev->Get()->Issues; + TransitToErrorState(); + } + bool SaveRetrievedResults(const NS3Lister::TListResult& listingResult) { LOG_T("TS3FileQueueActor", "SaveRetrievedResults"); if (std::holds_alternative(listingResult)) { @@ -440,56 +520,28 @@ class TS3FileQueueActor : public TActorBootstrapped { return false; } LOG_T("TS3FileQueueActor", "SaveRetrievedResults adding path: " << object.Path); - Objects.emplace_back(object.Path, object.Size, CurrentDirectoryPathIndex); + TObjectPath objectPath; + objectPath.SetPath(object.Path); + objectPath.SetSize(object.Size); + objectPath.SetPathIndex(CurrentDirectoryPathIndex); + Objects.emplace_back(std::move(objectPath)); + ObjectsTotalSize += object.Size; } return true; } - void AnswerPendingRequests() { - while (!RequestQueue.empty()) { - auto requestToFulfil = std::find_if( - RequestQueue.begin(), - RequestQueue.end(), - [this](auto& val) { return val.second <= Objects.size(); }); - - if (requestToFulfil != RequestQueue.end()) { - auto [actorId, requestedAmount] = *requestToFulfil; - LOG_T( - "TS3FileQueueActor", - "AnswerPendingRequests responding to " - << requestToFulfil->first << " with " << requestToFulfil->second - << " items"); - SendObjects(actorId, requestedAmount); - RequestQueue.erase(requestToFulfil); - } else { - LOG_T( - "TS3FileQueueActor", - "AnswerPendingRequests no more pending requests to fulfil"); - break; - } - } - } - bool FetchingInProgress() const { return ListingFuture.Defined(); } void TransitToNoMoreDirectoriesToListState() { LOG_I("TS3FileQueueActor", "TransitToNoMoreDirectoriesToListState no more directories to list"); - for (auto& [requestorId, size]: RequestQueue) { - SendObjects(requestorId, size); - } - RequestQueue.clear(); + AnswerPendingRequests(); Become(&TS3FileQueueActor::NoMoreDirectoriesState); } void TransitToErrorState() { Y_ENSURE(MaybeIssues.Defined()); LOG_I("TS3FileQueueActor", "TransitToErrorState an error occurred sending "); - for (auto& [requestorId, _]: RequestQueue) { - Send( - requestorId, - std::make_unique(*MaybeIssues)); - } - RequestQueue.clear(); + AnswerPendingRequests(); Objects.clear(); Directories.clear(); Become(&TS3FileQueueActor::AnErrorOccurredState); @@ -498,8 +550,10 @@ class TS3FileQueueActor : public TActorBootstrapped { STATEFN(NoMoreDirectoriesState) { try { switch (const auto etype = ev->GetTypeRewrite()) { - hFunc(TEvPrivatePrivate::TEvGetNextFile, HandleGetNextFileForEmptyState); - cFunc(TEvents::TSystem::Poison, PassAway); + hFunc(TEvS3FileQueue::TEvUpdateConsumersCount, HandleUpdateConsumersCount); + hFunc(TEvS3FileQueue::TEvGetNextBatch, HandleGetNextBatchForEmptyState); + cFunc(TEvPrivatePrivate::EvRoundRobinStageTimeout, HandleRoundRobinStageTimeout); + cFunc(TEvents::TSystem::Poison, HandlePoison); default: MaybeIssues = TIssues{TIssue{TStringBuilder() << "An event with unknown type has been received: '" << etype << "'"}}; TransitToErrorState(); @@ -511,16 +565,20 @@ class TS3FileQueueActor : public TActorBootstrapped { } } - void HandleGetNextFileForEmptyState(TEvPrivatePrivate::TEvGetNextFile::TPtr& ev) { - LOG_D("TS3FileQueueActor", "HandleGetNextFileForEmptyState Giving away rest of Objects"); - SendObjects(ev->Sender, ev->Get()->RequestedAmount); + void HandleGetNextBatchForEmptyState(TEvS3FileQueue::TEvGetNextBatch::TPtr& ev) { + LOG_T( + "TS3FileQueueActor", + "HandleGetNextBatchForEmptyState Giving away rest of Objects"); + TrySendObjects(ev->Sender, ev->Get()->Record.GetTransportMeta()); } STATEFN(AnErrorOccurredState) { try { switch (const auto etype = ev->GetTypeRewrite()) { - hFunc(TEvPrivatePrivate::TEvGetNextFile, HandleGetNextFileForErrorState); - cFunc(TEvents::TSystem::Poison, PassAway); + hFunc(TEvS3FileQueue::TEvUpdateConsumersCount, HandleUpdateConsumersCount); + hFunc(TEvS3FileQueue::TEvGetNextBatch, HandleGetNextBatchForErrorState); + cFunc(TEvPrivatePrivate::EvRoundRobinStageTimeout, HandleRoundRobinStageTimeout); + cFunc(TEvents::TSystem::Poison, HandlePoison); default: MaybeIssues = TIssues{TIssue{TStringBuilder() << "An event with unknown type has been received: '" << etype << "'"}}; break; @@ -530,66 +588,105 @@ class TS3FileQueueActor : public TActorBootstrapped { } } - void HandleGetNextFileForErrorState(TEvPrivatePrivate::TEvGetNextFile::TPtr& ev) { + void HandleGetNextBatchForErrorState(TEvS3FileQueue::TEvGetNextBatch::TPtr& ev) { LOG_D( "TS3FileQueueActor", - "HandleGetNextFileForErrorState Giving away rest of Objects"); - Send(ev->Sender, std::make_unique(*MaybeIssues)); + "HandleGetNextBatchForErrorState Giving away rest of Objects"); + Send(ev->Sender, new TEvS3FileQueue::TEvObjectPathReadError(*MaybeIssues, ev->Get()->Record.GetTransportMeta())); + TryFinish(ev->Sender, ev->Get()->Record.GetTransportMeta().GetSeqNo()); + } + + void HandleUpdateConsumersCount(TEvS3FileQueue::TEvUpdateConsumersCount::TPtr& ev) { + if (!UpdatedConsumers.contains(ev->Sender)) { + LOG_D( + "TS3FileQueueActor", + "HandleUpdateConsumersCount Reducing ConsumersCount by " << ev->Get()->Record.GetConsumersCountDelta() << ", recieved from " << ev->Sender); + UpdatedConsumers.insert(ev->Sender); + ConsumersCount -= ev->Get()->Record.GetConsumersCountDelta(); + } + Send(ev->Sender, new TEvS3FileQueue::TEvAck(ev->Get()->Record.GetTransportMeta())); + } + + void HandleRoundRobinStageTimeout() { + LOG_T("TS3FileQueueActor","Handle start stage timeout"); + if (!RoundRobinStageFinished) { + RoundRobinStageFinished = true; + AnswerPendingRequests(); + } } - void PassAway() override { - if (!MaybeIssues.Defined()) { - for (auto& [requestorId, size]: RequestQueue) { - SendObjects(requestorId, size); - } - } else { - for (auto& [requestorId, _]: RequestQueue) { - Send( - requestorId, - std::make_unique(*MaybeIssues)); - } - } + void HandlePoison() { + AnswerPendingRequests(); + PassAway(); + } - RequestQueue.clear(); - Objects.clear(); - Directories.clear(); + void PassAway() override { + LOG_D("TS3FileQueueActor", "PassAway"); TBase::PassAway(); } private: - void SendObjects(const TActorId& recipient, size_t amount) { + void TrySendObjects(const TActorId& consumer, const NDqProto::TMessageTransportMeta& transportMeta) { + if (CanSendToConsumer(consumer)) { + SendObjects(consumer, transportMeta); + } else { + ScheduleRequest(consumer, transportMeta); + } + } + + void SendObjects(const TActorId& consumer, const NDqProto::TMessageTransportMeta& transportMeta) { Y_ENSURE(!MaybeIssues.Defined()); - size_t correctedAmount = std::min(amount, Objects.size()); std::vector result; - if (correctedAmount != 0) { - result.reserve(correctedAmount); - for (size_t i = 0; i < correctedAmount; ++i) { + if (Objects.size() > 0) { + size_t totalSize = 0; + do { result.push_back(Objects.back()); Objects.pop_back(); + totalSize += result.back().GetSize(); + } while (Objects.size() > 0 && result.size() < BatchObjectCountLimit && totalSize < BatchSizeLimit); + ObjectsTotalSize -= totalSize; + } + + LOG_T("TS3FileQueueActor", "SendObjects Sending " << result.size() << " objects to consumer with id " << consumer); + Send(consumer, new TEvS3FileQueue::TEvObjectPathBatch(std::move(result), HasNoMoreItems(), transportMeta)); + + if (HasNoMoreItems()) { + TryFinish(consumer, transportMeta.GetSeqNo()); + } + + if (!RoundRobinStageFinished) { + if (StartedConsumers.empty()) { + Schedule(RoundRobinStageTimeout, new TEvPrivatePrivate::TEvRoundRobinStageTimeout()); + } + StartedConsumers.insert(consumer); + if ((StartedConsumers.size() == ConsumersCount || HasNoMoreItems()) && !IsRoundRobinFinishScheduled) { + IsRoundRobinFinishScheduled = true; + Send(SelfId(), new TEvPrivatePrivate::TEvRoundRobinStageTimeout()); } } + } - LOG_T( - "TS3FileQueueActor", - "SendObjects amount: " << amount << " correctedAmount: " << correctedAmount - << " result size: " << result.size()); + bool HasEnoughToSend() { + return Objects.size() >= BatchObjectCountLimit || ObjectsTotalSize >= BatchSizeLimit; + } - Send( - recipient, - std::make_unique( - std::move(result), HasNoMoreItems())); + bool CanSendToConsumer(const TActorId& consumer) { + return !UseRuntimeListing || RoundRobinStageFinished || + (StartedConsumers.size() < ConsumersCount && !StartedConsumers.contains(consumer)); } + bool HasNoMoreItems() const { return !(MaybeLister.Defined() && (*MaybeLister)->HasNext()) && Directories.empty() && Objects.empty(); } - bool TryPreFetch () { + bool TryPreFetch() { if (Objects.size() < PrefetchSize) { return TryFetch(); } return false; } + bool TryFetch() { if (FetchingInProgress()) { LOG_D("TS3FileQueueActor", "TryFetch fetching already in progress"); @@ -605,9 +702,9 @@ class TS3FileQueueActor : public TActorBootstrapped { if (!Directories.empty()) { LOG_D("TS3FileQueueActor", "TryFetch fetching from new lister"); - auto [path, size, pathIndex] = Directories.back(); + auto object = Directories.back(); Directories.pop_back(); - CurrentDirectoryPathIndex = pathIndex; + CurrentDirectoryPathIndex = object.GetPathIndex(); MaybeLister = NS3Lister::MakeS3Lister( Gateway, NS3Lister::TListingRequest{ @@ -615,9 +712,9 @@ class TS3FileQueueActor : public TActorBootstrapped { AuthInfo, PatternVariant == ES3PatternVariant::PathPattern ? Pattern - : TStringBuilder{} << path << Pattern, + : TStringBuilder{} << object.GetPath() << Pattern, PatternType, - path}, + object.GetPath()}, Nothing(), false); Fetch(); @@ -629,6 +726,7 @@ class TS3FileQueueActor : public TActorBootstrapped { TransitToNoMoreDirectoriesToListState(); return false; } + void Fetch() { Y_ENSURE(!ListingFuture.Defined()); Y_ENSURE(MaybeLister.Defined()); @@ -638,12 +736,71 @@ class TS3FileQueueActor : public TActorBootstrapped { ->Next() .Subscribe([actorSystem, selfId = SelfId()]( const NThreading::TFuture& future) { - actorSystem->Send( - selfId, - new TEvPrivatePrivate::TEvNextListingChunkReceived( - future.GetValue())); + try { + actorSystem->Send( + selfId, + new TEvPrivatePrivate::TEvNextListingChunkReceived( + future.GetValue())); + } catch (const std::exception& e) { + actorSystem->Send( + selfId, + new TEvPrivatePrivate::TEvTransitToErrorState( + TIssues{TIssue{TStringBuilder() << "An unknown exception has occurred: '" << e.what() << "'"}})); + } }); } + + void ScheduleRequest(const TActorId& consumer, const TMessageTransportMeta& transportMeta) { + PendingRequests[consumer].push_back(transportMeta); + HasPendingRequests = true; + } + + void AnswerPendingRequests(bool earlyStop = false) { + bool handledRequest = true; + while (HasPendingRequests && handledRequest) { + bool isEmpty = true; + handledRequest = false; + for (auto& [consumer, requests] : PendingRequests) { + if (!CanSendToConsumer(consumer) || (earlyStop && !HasEnoughToSend())) { + if (!requests.empty()) { + isEmpty = false; + } + continue; + } + if (!requests.empty()) { + if (!MaybeIssues.Defined()) { + SendObjects(consumer, requests.front()); + } else { + Send(consumer, new TEvS3FileQueue::TEvObjectPathReadError(*MaybeIssues, requests.front())); + TryFinish(consumer, requests.front().GetSeqNo()); + } + requests.pop_front(); + handledRequest = true; + } + if (!requests.empty()) { + isEmpty = false; + } + } + if (isEmpty) { + HasPendingRequests = false; + } + } + } + + void TryFinish(const TActorId& consumer, ui64 seqNo) { + LOG_T("TS3FileQueueActor", "TryFinish from consumer " << consumer << ", " << FinishedConsumers.size() << " consumers already finished, seqNo=" << seqNo); + if (FinishingConsumerToLastSeqNo.contains(consumer)) { + LOG_T("TS3FileQueueActor", "TryFinish FinishingConsumerToLastSeqNo=" << FinishingConsumerToLastSeqNo[consumer]); + if (FinishingConsumerToLastSeqNo[consumer] < seqNo || SelfId().NodeId() == consumer.NodeId()) { + FinishedConsumers.insert(consumer); + if (FinishedConsumers.size() == ConsumersCount) { + PassAway(); + } + } + } else { + FinishingConsumerToLastSeqNo[consumer] = seqNo; + } + } private: const TTxId TxId; @@ -656,8 +813,20 @@ class TS3FileQueueActor : public TActorBootstrapped { TMaybe MaybeLister = Nothing(); TMaybe> ListingFuture; size_t CurrentDirectoryPathIndex = 0; - std::deque> RequestQueue; + THashMap> PendingRequests; TMaybe MaybeIssues; + bool UseRuntimeListing; + ui64 ConsumersCount; + ui64 BatchSizeLimit; + ui64 BatchObjectCountLimit; + ui64 ObjectsTotalSize = 0; + THashMap FinishingConsumerToLastSeqNo; + THashSet FinishedConsumers; + bool RoundRobinStageFinished = false; + bool IsRoundRobinFinishScheduled = false; + bool HasPendingRequests = false; + THashSet StartedConsumers; + THashSet UpdatedConsumers; const IHTTPGateway::TPtr Gateway; const TString Url; @@ -665,6 +834,9 @@ class TS3FileQueueActor : public TActorBootstrapped { const TString Pattern; const ES3PatternVariant PatternVariant; const ES3PatternType PatternType; + + static constexpr TDuration PoisonTimeout = TDuration::Hours(3); + static constexpr TDuration RoundRobinStageTimeout = TDuration::Seconds(3); }; ui64 SubtractSaturating(ui64 lhs, ui64 rhs) { @@ -691,7 +863,12 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA ::NMonitoring::TDynamicCounterPtr counters, ::NMonitoring::TDynamicCounterPtr taskCounters, ui64 fileSizeLimit, - std::optional rowsLimitHint) + std::optional rowsLimitHint, + bool useRuntimeListing, + TActorId fileQueueActor, + ui64 fileQueueBatchSizeLimit, + ui64 fileQueueBatchObjectCountLimit, + ui64 fileQueueConsumersCountDelta) : ReadActorFactoryCfg(readActorFactoryCfg) , Gateway(std::move(gateway)) , HolderFactory(holderFactory) @@ -705,12 +882,17 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA , Pattern(pattern) , PatternVariant(patternVariant) , Paths(std::move(paths)) + , FileQueueActor(fileQueueActor) , AddPathIndex(addPathIndex) , SizeLimit(sizeLimit) , Counters(counters) , TaskCounters(taskCounters) , FileSizeLimit(fileSizeLimit) - , FilesRemained(rowsLimitHint) { + , FilesRemained(rowsLimitHint) + , UseRuntimeListing(useRuntimeListing) + , FileQueueBatchSizeLimit(fileQueueBatchSizeLimit) + , FileQueueBatchObjectCountLimit(fileQueueBatchObjectCountLimit) + , FileQueueConsumersCountDelta(fileQueueConsumersCountDelta) { if (Counters) { QueueDataSize = Counters->GetCounter("QueueDataSize"); QueueDataLimit = Counters->GetCounter("QueueDataLimit"); @@ -726,27 +908,46 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA } void Bootstrap() { - LOG_D("TS3ReadActor", "Bootstrap" << ", InputIndex: " << InputIndex); - FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ - TxId, - std::move(Paths), - ReadActorFactoryCfg.MaxInflight * 2, - FileSizeLimit, - Gateway, - Url, - AuthInfo, - Pattern, - PatternVariant, - ES3PatternType::Wildcard}); - SendPathRequest(); + if (!UseRuntimeListing) { + FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ + TxId, + std::move(Paths), + ReadActorFactoryCfg.MaxInflight * 2, + FileSizeLimit, + false, + 1, + FileQueueBatchSizeLimit, + FileQueueBatchObjectCountLimit, + Gateway, + Url, + AuthInfo, + Pattern, + PatternVariant, + ES3PatternType::Wildcard}); + } + + LOG_D("TS3ReadActor", "Bootstrap" << ", InputIndex: " << InputIndex << ", FileQueue: " << FileQueueActor << (UseRuntimeListing ? " (remote)" : " (local")); + + FileQueueEvents.Init(TxId, SelfId(), SelfId()); + FileQueueEvents.OnNewRecipientId(FileQueueActor); + if (UseRuntimeListing && FileQueueConsumersCountDelta > 0) { + FileQueueEvents.Send(new TEvS3FileQueue::TEvUpdateConsumersCount(FileQueueConsumersCountDelta)); + } + SendPathBatchRequest(); + Become(&TS3ReadActor::StateFunc); } bool TryStartDownload() { - if (ObjectPathCache.empty()) { + TrySendPathBatchRequest(); + if (PathBatchQueue.empty()) { // no path is pending return false; } + if (IsCurrentBatchEmpty) { + // waiting for batch to finish + return false; + } if (QueueTotalDataSize > ReadActorFactoryCfg.DataInflight) { // too large data inflight return false; @@ -766,38 +967,42 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA void StartDownload() { DownloadInflight++; - const auto& [path, size, index] = ReadPathFromCache(); - auto url = Url + path; - auto id = index; + const auto& object = ReadPathFromCache(); + auto url = Url + object.GetPath(); + auto id = object.GetPathIndex(); const TString requestId = CreateGuidAsString(); LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]"); Gateway->Download( UrlEscapeRet(url, true), IHTTPGateway::MakeYcHeaders(requestId, AuthInfo.GetToken(), {}, AuthInfo.GetAwsUserPwd(), AuthInfo.GetAwsSigV4()), 0U, - std::min(size, SizeLimit), - std::bind(&TS3ReadActor::OnDownloadFinished, ActorSystem, SelfId(), requestId, std::placeholders::_1, id, path), + std::min(object.GetSize(), SizeLimit), + std::bind(&TS3ReadActor::OnDownloadFinished, ActorSystem, SelfId(), requestId, std::placeholders::_1, id, object.GetPath()), {}, RetryPolicy); } TObjectPath ReadPathFromCache() { - Y_ENSURE(!ObjectPathCache.empty()); - auto object = ObjectPathCache.back(); - ObjectPathCache.pop_back(); - if (ObjectPathCache.empty() && !IsObjectQueueEmpty && !ConsumedEnoughFiles()) { - SendPathRequest(); - } + Y_ENSURE(!PathBatchQueue.empty()); + auto& currentBatch = PathBatchQueue.front(); + Y_ENSURE(!currentBatch.empty()); + auto object = currentBatch.back(); + currentBatch.pop_back(); + if (currentBatch.empty()) { + PathBatchQueue.pop_front(); + IsCurrentBatchEmpty = true; + } + TrySendPathBatchRequest(); return object; } - void SendPathRequest() { - Y_ENSURE(!IsWaitingObjectQueueResponse); - const ui64 requestedAmount = std::min(ReadActorFactoryCfg.MaxInflight, FilesRemained.value_or(std::numeric_limits::max())); - Send( - FileQueueActor, - std::make_unique( - requestedAmount)); - IsWaitingObjectQueueResponse = true; + void TrySendPathBatchRequest() { + if (PathBatchQueue.size() < 2 && !IsFileQueueEmpty && !ConsumedEnoughFiles() && !IsWaitingFileQueueResponse) { + SendPathBatchRequest(); + } + } + void SendPathBatchRequest() { + FileQueueEvents.Send(new TEvS3FileQueue::TEvGetNextBatch()); + IsWaitingFileQueueResponse = true; } static constexpr char ActorName[] = "S3_READ_ACTOR"; @@ -823,35 +1028,72 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA return FilesRemained && (*FilesRemained == 0); } - STRICT_STFUNC(StateFunc, + STRICT_STFUNC_EXC(StateFunc, hFunc(TEvPrivate::TEvReadResult, Handle); hFunc(TEvPrivate::TEvReadError, Handle); - hFunc(TEvPrivate::TEvObjectPathBatch, HandleObjectPathBatch); - hFunc(TEvPrivate::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(TEvS3FileQueue::TEvObjectPathBatch, HandleObjectPathBatch); + hFunc(TEvS3FileQueue::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(TEvS3FileQueue::TEvAck, HandleAck); + hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); + , catch (const std::exception& e) { + TIssues issues{TIssue{TStringBuilder() << "An unknown exception has occurred: '" << e.what() << "'"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } ) - void HandleObjectPathBatch(TEvPrivate::TEvObjectPathBatch::TPtr& objectPathBatch) { - Y_ENSURE(IsWaitingObjectQueueResponse); - IsWaitingObjectQueueResponse = false; - ListedFiles += objectPathBatch->Get()->ObjectPaths.size(); - IsObjectQueueEmpty = objectPathBatch->Get()->NoMoreFiles; - ObjectPathCache.insert( - ObjectPathCache.end(), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.begin()), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.end())); + void HandleObjectPathBatch(TEvS3FileQueue::TEvObjectPathBatch::TPtr& objectPathBatch) { + if (!FileQueueEvents.OnEventReceived(objectPathBatch)) { + LOG_W("TS3ReadActor", "Duplicated TEvObjectPathBatch (likely resent) from " << FileQueueActor); + return; + } + + Y_ENSURE(IsWaitingFileQueueResponse); + IsWaitingFileQueueResponse = false; + auto& objectBatch = objectPathBatch->Get()->Record; + ListedFiles += objectBatch.GetObjectPaths().size(); + IsFileQueueEmpty = objectBatch.GetNoMoreFiles(); + if (IsFileQueueEmpty && !IsConfirmedFileQueueFinish) { + LOG_D("TS3ReadActor", "Confirm finish to " << FileQueueActor); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + if (!objectBatch.GetObjectPaths().empty()) { + PathBatchQueue.emplace_back( + std::make_move_iterator(objectBatch.MutableObjectPaths()->begin()), + std::make_move_iterator(objectBatch.MutableObjectPaths()->end())); + } while (TryStartDownload()) {} if (LastFileWasProcessed()) { Send(ComputeActorId, new TEvNewAsyncInputDataArrived(InputIndex)); } } - void HandleObjectPathReadError(TEvPrivate::TEvObjectPathReadError::TPtr& result) { - IsObjectQueueEmpty = true; - LOG_E("TS3ReadActor", "Error while object listing, details: TEvObjectPathReadError: " << result->Get()->Issues.ToOneLineString()); - auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", TIssues{result->Get()->Issues}); + void HandleObjectPathReadError(TEvS3FileQueue::TEvObjectPathReadError::TPtr& result) { + if (!FileQueueEvents.OnEventReceived(result)) { + LOG_W("TS3ReadActor", "Duplicated TEvObjectPathReadError (likely resent) from " << FileQueueActor); + return; + } + + IsFileQueueEmpty = true; + if (!IsConfirmedFileQueueFinish) { + LOG_D("TS3ReadActor", "Confirm finish (with errors) to " << FileQueueActor); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + TIssues issues; + IssuesFromMessage(result->Get()->Record.GetIssues(), issues); + LOG_E("TS3ReadActor", "Error while object listing, details: TEvObjectPathReadError: " << issues.ToOneLineString()); + issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", std::move(issues)); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); } + void HandleAck(TEvS3FileQueue::TEvAck::TPtr& ev) { + FileQueueEvents.OnEventReceived(ev); + } + static void OnDownloadFinished(TActorSystem* actorSystem, TActorId selfId, const TString& requestId, IHTTPGateway::TResult&& result, size_t pathInd, const TString path) { if (!result.Issues) { actorSystem->Send(new IEventHandle(selfId, TActorId(), new TEvPrivate::TEvReadResult(std::move(result.Content), requestId, pathInd, path))); @@ -892,7 +1134,7 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA } while (!Blocks.empty() && freeSpace > 0LL); } - if (LastFileWasProcessed() || ConsumedEnoughFiles()) { + if ((LastFileWasProcessed() || ConsumedEnoughFiles()) && !FileQueueEvents.RemoveConfirmedEvents()) { finished = true; ContainerCache.Clear(); } @@ -904,7 +1146,7 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA return total; } bool LastFileWasProcessed() const { - return Blocks.empty() && (ListedFiles == CompletedFiles) && IsObjectQueueEmpty; + return Blocks.empty() && (ListedFiles == CompletedFiles) && IsFileQueueEmpty; } void Handle(TEvPrivate::TEvReadResult::TPtr& result) { @@ -934,6 +1176,9 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA } Blocks.emplace(std::make_tuple(std::move(result->Get()->Result), id)); DownloadInflight--; + if (IsCurrentBatchEmpty && DownloadInflight == 0) { + IsCurrentBatchEmpty = false; + } if (FilesRemained) { *FilesRemained = SubtractSaturating(*FilesRemained, 1); } @@ -960,6 +1205,28 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while reading file " << path << " with request id [" << requestId << "]", TIssues{result->Get()->Error}); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); } + + void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&) { + FileQueueEvents.Retry(); + } + + void Handle(NActors::TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { + LOG_T("TS3ReadActor", "Handle disconnected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeDisconnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvInterconnect::TEvNodeConnected::TPtr& ev) { + LOG_T("TS3ReadActor", "Handle connected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeConnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + LOG_T("TS3ReadActor", "Handle undelivered FileQueue "); + if (!FileQueueEvents.HandleUndelivered(ev)) { + TIssues issues{TIssue{TStringBuilder() << "FileQueue was lost"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } + } // IActor & IDqComputeActorAsyncInput void PassAway() override { // Is called from Compute Actor @@ -977,7 +1244,7 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA QueueTotalDataSize = 0; ContainerCache.Clear(); - Send(FileQueueActor, new NActors::TEvents::TEvPoison()); + FileQueueEvents.Unsubscribe(); TActorBootstrapped::PassAway(); } @@ -1000,9 +1267,6 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA const TString Pattern; const ES3PatternVariant PatternVariant; TPathList Paths; - std::vector ObjectPathCache; - bool IsObjectQueueEmpty = false; - bool IsWaitingObjectQueueResponse = false; size_t ListedFiles = 0; size_t CompletedFiles = 0; NActors::TActorId FileQueueActor; @@ -1026,6 +1290,17 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA ui64 DownloadInflight = 0; const ui64 FileSizeLimit; std::optional FilesRemained; + + bool UseRuntimeListing; + ui64 FileQueueBatchSizeLimit; + ui64 FileQueueBatchObjectCountLimit; + ui64 FileQueueConsumersCountDelta; + bool IsFileQueueEmpty = false; + bool IsCurrentBatchEmpty = false; + bool IsWaitingFileQueueResponse = false; + bool IsConfirmedFileQueueFinish = false; + TRetryEventsQueue FileQueueEvents; + TDeque> PathBatchQueue; }; struct TReadSpec { @@ -1554,7 +1829,7 @@ class TS3ReadCoroImpl : public TActorCoroImpl { if (it != RangeCache.end()) { return it->second; } - RetryStuff->Gateway->Download(Url + Path, RetryStuff->Headers, + RetryStuff->Gateway->Download(RetryStuff->Url, RetryStuff->Headers, range.Offset, range.Length, std::bind(&OnResult, GetActorSystem(), SelfActorId, range, ++RangeCookie, std::placeholders::_1), @@ -2260,7 +2535,12 @@ class TS3StreamReadActor : public TActorBootstrapped, public ::NMonitoring::TDynamicCounterPtr taskCounters, ui64 fileSizeLimit, std::optional rowsLimitHint, - IMemoryQuotaManager::TPtr memoryQuotaManager + IMemoryQuotaManager::TPtr memoryQuotaManager, + bool useRuntimeListing, + TActorId fileQueueActor, + ui64 fileQueueBatchSizeLimit, + ui64 fileQueueBatchObjectCountLimit, + ui64 fileQueueConsumersCountDelta ) : ReadActorFactoryCfg(readActorFactoryCfg) , Gateway(std::move(gateway)) , HolderFactory(holderFactory) @@ -2278,8 +2558,13 @@ class TS3StreamReadActor : public TActorBootstrapped, public , ReadSpec(readSpec) , Counters(std::move(counters)) , TaskCounters(std::move(taskCounters)) + , FileQueueActor(fileQueueActor) , FileSizeLimit(fileSizeLimit) - , MemoryQuotaManager(memoryQuotaManager) { + , MemoryQuotaManager(memoryQuotaManager) + , UseRuntimeListing(useRuntimeListing) + , FileQueueBatchSizeLimit(fileQueueBatchSizeLimit) + , FileQueueBatchObjectCountLimit(fileQueueBatchObjectCountLimit) + , FileQueueConsumersCountDelta(fileQueueConsumersCountDelta) { if (Counters) { QueueDataSize = Counters->GetCounter("QueueDataSize"); QueueDataLimit = Counters->GetCounter("QueueDataLimit"); @@ -2326,27 +2611,45 @@ class TS3StreamReadActor : public TActorBootstrapped, public TaskDownloadPaused, TaskChunkDownloadCount, DecodedChunkSizeHist); - FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ - TxId, - std::move(Paths), - ReadActorFactoryCfg.MaxInflight * 2, - FileSizeLimit, - Gateway, - Url, - AuthInfo, - Pattern, - PatternVariant, - ES3PatternType::Wildcard}); - SendPathRequest(); + + if (!UseRuntimeListing) { + FileQueueActor = RegisterWithSameMailbox(new TS3FileQueueActor{ + TxId, + std::move(Paths), + ReadActorFactoryCfg.MaxInflight * 2, + FileSizeLimit, + false, + 1, + FileQueueBatchSizeLimit, + FileQueueBatchObjectCountLimit, + Gateway, + Url, + AuthInfo, + Pattern, + PatternVariant, + ES3PatternType::Wildcard}); + } + FileQueueEvents.Init(TxId, SelfId(), SelfId()); + FileQueueEvents.OnNewRecipientId(FileQueueActor); + if (UseRuntimeListing && FileQueueConsumersCountDelta > 0) { + FileQueueEvents.Send(new TEvS3FileQueue::TEvUpdateConsumersCount(FileQueueConsumersCountDelta)); + } + SendPathBatchRequest(); + Become(&TS3StreamReadActor::StateFunc); Bootstrapped = true; } bool TryRegisterCoro() { - if (ObjectPathCache.empty()) { + TrySendPathBatchRequest(); + if (PathBatchQueue.empty()) { // no path is pending return false; } + if (IsCurrentBatchEmpty) { + // waiting for batch to finish + return false; + } if (QueueBufferCounter->IsFull()) { // too large data inflight return false; @@ -2378,24 +2681,24 @@ class TS3StreamReadActor : public TActorBootstrapped, public if (TaskCounters) { TaskDownloadCount->Inc(); } - const auto& objectPath = ReadPathFromCache(); - DownloadSize += objectPath.Size; + const auto& object = ReadPathFromCache(); + DownloadSize += object.GetSize(); const TString requestId = CreateGuidAsString(); auto stuff = std::make_shared( Gateway, - Url + objectPath.Path, + Url + object.GetPath(), IHTTPGateway::MakeYcHeaders(requestId, AuthInfo.GetToken(), {}, AuthInfo.GetAwsUserPwd(), AuthInfo.GetAwsSigV4()), - objectPath.Size, + object.GetSize(), TxId, requestId, RetryPolicy); - auto pathIndex = objectPath.PathIndex; + auto pathIndex = object.GetPathIndex(); if (TaskCounters) { HttpInflightLimit->Add(Gateway->GetBuffersSizePerStream()); } LOG_D( "TS3StreamReadActor", - "RegisterCoro with path " << objectPath.Path << " with pathIndex " + "RegisterCoro with path " << object.GetPath() << " with pathIndex " << pathIndex); auto impl = MakeHolder( InputIndex, @@ -2404,7 +2707,7 @@ class TS3StreamReadActor : public TActorBootstrapped, public std::move(stuff), ReadSpec, pathIndex, - objectPath.Path, + object.GetPath(), Url, RowsRemained, ReadActorFactoryCfg, @@ -2419,22 +2722,26 @@ class TS3StreamReadActor : public TActorBootstrapped, public } TObjectPath ReadPathFromCache() { - Y_ENSURE(!ObjectPathCache.empty()); - auto object = ObjectPathCache.back(); - ObjectPathCache.pop_back(); - if (ObjectPathCache.empty() && !IsObjectQueueEmpty) { - SendPathRequest(); - } + Y_ENSURE(!PathBatchQueue.empty()); + auto& currentBatch = PathBatchQueue.front(); + Y_ENSURE(!currentBatch.empty()); + auto object = currentBatch.back(); + currentBatch.pop_back(); + if (currentBatch.empty()) { + PathBatchQueue.pop_front(); + IsCurrentBatchEmpty = true; + } + TrySendPathBatchRequest(); return object; } - void SendPathRequest() { - Y_ENSURE(!IsWaitingObjectQueueResponse); - LOG_D("TS3StreamReadActor", "SendPathRequest " << ReadActorFactoryCfg.MaxInflight); - Send( - FileQueueActor, - std::make_unique( - ReadActorFactoryCfg.MaxInflight)); - IsWaitingObjectQueueResponse = true; + void TrySendPathBatchRequest() { + if (PathBatchQueue.size() < 2 && !IsFileQueueEmpty && !IsWaitingFileQueueResponse) { + SendPathBatchRequest(); + } + } + void SendPathBatchRequest() { + FileQueueEvents.Send(new TEvS3FileQueue::TEvGetNextBatch()); + IsWaitingFileQueueResponse = true; } static constexpr char ActorName[] = "S3_STREAM_READ_ACTOR"; @@ -2528,7 +2835,7 @@ class TS3StreamReadActor : public TActorBootstrapped, public TryRegisterCoro(); } while (!Blocks.empty() && free > 0LL && GetBlockSize(Blocks.front()) <= size_t(free)); - finished = ConsumedEnoughRows() || LastFileWasProcessed(); + finished = (ConsumedEnoughRows() || LastFileWasProcessed()) && !FileQueueEvents.RemoveConfirmedEvents(); if (finished) { ContainerCache.Clear(); ArrowTupleContainerCache.Clear(); @@ -2560,7 +2867,8 @@ class TS3StreamReadActor : public TActorBootstrapped, public for (const auto actorId : CoroActors) { Send(actorId, new NActors::TEvents::TEvPoison()); } - Send(FileQueueActor, new NActors::TEvents::TEvPoison()); + LOG_T("TS3StreamReadActor", "PassAway FileQueue RemoveConfirmedEvents=" << FileQueueEvents.RemoveConfirmedEvents()); + FileQueueEvents.Unsubscribe(); ContainerCache.Clear(); ArrowTupleContainerCache.Clear(); @@ -2575,36 +2883,69 @@ class TS3StreamReadActor : public TActorBootstrapped, public TActorBootstrapped::PassAway(); } - STRICT_STFUNC(StateFunc, + STRICT_STFUNC_EXC(StateFunc, hFunc(TEvPrivate::TEvRetryEventFunc, HandleRetry); hFunc(TEvPrivate::TEvNextBlock, HandleNextBlock); hFunc(TEvPrivate::TEvNextRecordBatch, HandleNextRecordBatch); hFunc(TEvPrivate::TEvFileFinished, HandleFileFinished); - hFunc(TEvPrivate::TEvObjectPathBatch, HandleObjectPathBatch); - hFunc(TEvPrivate::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(TEvS3FileQueue::TEvAck, Handle); + hFunc(TEvS3FileQueue::TEvObjectPathBatch, HandleObjectPathBatch); + hFunc(TEvS3FileQueue::TEvObjectPathReadError, HandleObjectPathReadError); + hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeDisconnected, Handle); + hFunc(NActors::TEvInterconnect::TEvNodeConnected, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); + , catch (const std::exception& e) { + TIssues issues{TIssue{TStringBuilder() << "An unknown exception has occurred: '" << e.what() << "'"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } ) - void HandleObjectPathBatch(TEvPrivate::TEvObjectPathBatch::TPtr& objectPathBatch) { - Y_ENSURE(IsWaitingObjectQueueResponse); - IsWaitingObjectQueueResponse = false; - ListedFiles += objectPathBatch->Get()->ObjectPaths.size(); - IsObjectQueueEmpty = objectPathBatch->Get()->NoMoreFiles; + void HandleObjectPathBatch(TEvS3FileQueue::TEvObjectPathBatch::TPtr& objectPathBatch) { + if (!FileQueueEvents.OnEventReceived(objectPathBatch)) { + return; + } - ObjectPathCache.insert( - ObjectPathCache.end(), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.begin()), - std::make_move_iterator(objectPathBatch->Get()->ObjectPaths.end())); + Y_ENSURE(IsWaitingFileQueueResponse); + IsWaitingFileQueueResponse = false; + auto& objectBatch = objectPathBatch->Get()->Record; + ListedFiles += objectBatch.GetObjectPaths().size(); + IsFileQueueEmpty = objectBatch.GetNoMoreFiles(); + if (IsFileQueueEmpty && !IsConfirmedFileQueueFinish) { + LOG_T("TS3StreamReadActor", "Sending finish confirmation to FileQueue"); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + if (!objectBatch.GetObjectPaths().empty()) { + PathBatchQueue.emplace_back( + std::make_move_iterator(objectBatch.MutableObjectPaths()->begin()), + std::make_move_iterator(objectBatch.MutableObjectPaths()->end())); + } LOG_D( "TS3StreamReadActor", - "HandleObjectPathBatch " << ObjectPathCache.size() << " IsObjectQueueEmpty " - << IsObjectQueueEmpty << " MaxInflight " << ReadActorFactoryCfg.MaxInflight); + "HandleObjectPathBatch of size " << objectBatch.GetObjectPaths().size()); while (TryRegisterCoro()) {} + + if (LastFileWasProcessed()) { + Send(ComputeActorId, new TEvNewAsyncInputDataArrived(InputIndex)); + } } - void HandleObjectPathReadError(TEvPrivate::TEvObjectPathReadError::TPtr& result) { - IsObjectQueueEmpty = true; - LOG_W("TS3StreamReadActor", "Error while object listing, details: TEvObjectPathReadError: " << result->Get()->Issues.ToOneLineString()); - auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", TIssues{result->Get()->Issues}); + void HandleObjectPathReadError(TEvS3FileQueue::TEvObjectPathReadError::TPtr& result) { + if (!FileQueueEvents.OnEventReceived(result)) { + return; + } + + IsFileQueueEmpty = true; + if (!IsConfirmedFileQueueFinish) { + LOG_T("TS3StreamReadActor", "Sending finish confirmation to FileQueue"); + SendPathBatchRequest(); + IsConfirmedFileQueueFinish = true; + } + TIssues issues; + IssuesFromMessage(result->Get()->Record.GetIssues(), issues); + LOG_W("TS3StreamReadActor", "Error while object listing, details: TEvObjectPathReadError: " << issues.ToOneLineString()); + issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while object listing", std::move(issues)); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); } @@ -2646,6 +2987,9 @@ class TS3StreamReadActor : public TActorBootstrapped, public void HandleFileFinished(TEvPrivate::TEvFileFinished::TPtr& ev) { CoroActors.erase(ev->Sender); + if (IsCurrentBatchEmpty && CoroActors.size() == 0) { + IsCurrentBatchEmpty = false; + } if (ev->Get()->IngressDelta) { IngressStats.Bytes += ev->Get()->IngressDelta; IngressStats.Chunks++; @@ -2677,7 +3021,7 @@ class TS3StreamReadActor : public TActorBootstrapped, public } CompletedFiles++; IngressStats.Splits++; - if (!ObjectPathCache.empty()) { + if (!PathBatchQueue.empty()) { TryRegisterCoro(); } else { /* @@ -2690,9 +3034,34 @@ class TS3StreamReadActor : public TActorBootstrapped, public } } } + + void Handle(TEvS3FileQueue::TEvAck::TPtr& ev) { + FileQueueEvents.OnEventReceived(ev); + } + void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&) { + FileQueueEvents.Retry(); + } + + void Handle(NActors::TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { + LOG_T("TS3StreamReadActor", "Handle disconnected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeDisconnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvInterconnect::TEvNodeConnected::TPtr& ev) { + LOG_T("TS3StreamReadActor", "Handle connected FileQueue " << ev->Get()->NodeId); + FileQueueEvents.HandleNodeConnected(ev->Get()->NodeId); + } + + void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + LOG_T("TS3StreamReadActor", "Handle undelivered FileQueue "); + if (!FileQueueEvents.HandleUndelivered(ev)) { + TIssues issues{TIssue{TStringBuilder() << "FileQueue was lost"}}; + Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::INTERNAL_ERROR)); + } + } bool LastFileWasProcessed() const { - return Blocks.empty() && (ListedFiles == CompletedFiles) && IsObjectQueueEmpty; + return Blocks.empty() && (ListedFiles == CompletedFiles) && IsFileQueueEmpty; } void StopLoadsIfEnough(ui64 consumedRows) { @@ -2732,9 +3101,6 @@ class TS3StreamReadActor : public TActorBootstrapped, public const TString Pattern; const ES3PatternVariant PatternVariant; TPathList Paths; - std::vector ObjectPathCache; - bool IsObjectQueueEmpty = false; - bool IsWaitingObjectQueueResponse = false; const bool AddPathIndex; size_t ListedFiles = 0; size_t CompletedFiles = 0; @@ -2768,6 +3134,16 @@ class TS3StreamReadActor : public TActorBootstrapped, public const ui64 FileSizeLimit; bool Bootstrapped = false; IMemoryQuotaManager::TPtr MemoryQuotaManager; + bool UseRuntimeListing; + ui64 FileQueueBatchSizeLimit; + ui64 FileQueueBatchObjectCountLimit; + ui64 FileQueueConsumersCountDelta; + bool IsCurrentBatchEmpty = false; + bool IsFileQueueEmpty = false; + bool IsWaitingFileQueueResponse = false; + bool IsConfirmedFileQueueFinish = false; + TRetryEventsQueue FileQueueEvents; + TDeque> PathBatchQueue; }; using namespace NKikimr::NMiniKQL; @@ -2910,6 +3286,39 @@ NDB::FormatSettings::TimestampFormat ToTimestampFormat(const TString& formatName using namespace NKikimr::NMiniKQL; +IActor* CreateS3FileQueueActor( + TTxId txId, + TPathList paths, + size_t prefetchSize, + ui64 fileSizeLimit, + bool useRuntimeListing, + ui64 consumersCount, + ui64 batchSizeLimit, + ui64 batchObjectCountLimit, + IHTTPGateway::TPtr gateway, + TString url, + TS3Credentials::TAuthInfo authInfo, + TString pattern, + ES3PatternVariant patternVariant, + ES3PatternType patternType) { + return new TS3FileQueueActor( + txId, + paths, + prefetchSize, + fileSizeLimit, + useRuntimeListing, + consumersCount, + batchSizeLimit, + batchObjectCountLimit, + gateway, + url, + authInfo, + pattern, + patternVariant, + patternType + ); +} + std::pair CreateS3ReadActor( const TTypeEnvironment& typeEnv, const THolderFactory& holderFactory, @@ -2981,6 +3390,29 @@ std::pair CreateS3ReadActor( if (params.GetRowsLimitHint() != 0) { rowsLimitHint = params.GetRowsLimitHint(); } + + TActorId fileQueueActor; + if (auto it = settings.find("fileQueueActor"); it != settings.cend()) { + NActorsProto::TActorId protoId; + TMemoryInput inputStream(it->second); + ParseFromTextFormat(inputStream, protoId); + fileQueueActor = ActorIdFromProto(protoId); + } + + ui64 fileQueueBatchSizeLimit; + if (auto it = settings.find("fileQueueBatchSizeLimit"); it != settings.cend()) { + fileQueueBatchSizeLimit = FromString(it->second); + } + + ui64 fileQueueBatchObjectCountLimit; + if (auto it = settings.find("fileQueueBatchObjectCountLimit"); it != settings.cend()) { + fileQueueBatchObjectCountLimit = FromString(it->second); + } + + ui64 fileQueueConsumersCountDelta = 0; + if (readRanges.size() > 1) { + fileQueueConsumersCountDelta = readRanges.size() - 1; + } if (params.HasFormat() && params.HasRowType()) { const auto pb = std::make_unique(typeEnv, functionRegistry); @@ -3086,7 +3518,8 @@ std::pair CreateS3ReadActor( #undef SUPPORTED_FLAGS const auto actor = new TS3StreamReadActor(inputIndex, statsLevel, txId, std::move(gateway), holderFactory, params.GetUrl(), authInfo, pathPattern, pathPatternVariant, std::move(paths), addPathIndex, readSpec, computeActorId, retryPolicy, - cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, memoryQuotaManager); + cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, memoryQuotaManager, + params.GetUseRuntimeListing(), fileQueueActor, fileQueueBatchSizeLimit, fileQueueBatchObjectCountLimit, fileQueueConsumersCountDelta); return {actor, actor}; } else { @@ -3096,7 +3529,8 @@ std::pair CreateS3ReadActor( const auto actor = new TS3ReadActor(inputIndex, statsLevel, txId, std::move(gateway), holderFactory, params.GetUrl(), authInfo, pathPattern, pathPatternVariant, std::move(paths), addPathIndex, computeActorId, sizeLimit, retryPolicy, - cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint); + cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, + params.GetUseRuntimeListing(), fileQueueActor, fileQueueBatchSizeLimit, fileQueueBatchObjectCountLimit, fileQueueConsumersCountDelta); return {actor, actor}; } } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h index f4569b0219d5..2b1ca1adeff6 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h @@ -2,8 +2,10 @@ #include #include +#include "ydb/library/yql/providers/s3/object_listers/yql_s3_list.h" #include #include +#include "ydb/library/yql/providers/s3/range_helpers/path_list_reader.h" #include #include @@ -11,6 +13,22 @@ namespace NYql::NDq { struct TS3ReadActorFactoryConfig; +NActors::IActor* CreateS3FileQueueActor( + TTxId txId, + NS3Details::TPathList paths, + size_t prefetchSize, + ui64 fileSizeLimit, + bool useRuntimeListing, + ui64 consumersCount, + ui64 batchSizeLimit, + ui64 batchObjectCountLimit, + IHTTPGateway::TPtr gateway, + TString url, + TS3Credentials::TAuthInfo authInfo, + TString pattern, + NYql::NS3Lister::ES3PatternVariant patternVariant, + NS3Lister::ES3PatternType patternType); + std::pair CreateS3ReadActor( const NKikimr::NMiniKQL::TTypeEnvironment& typeEnv, const NKikimr::NMiniKQL::THolderFactory& holderFactory, diff --git a/ydb/library/yql/providers/s3/proto/file_queue.proto b/ydb/library/yql/providers/s3/proto/file_queue.proto new file mode 100644 index 000000000000..75ec283f20f2 --- /dev/null +++ b/ydb/library/yql/providers/s3/proto/file_queue.proto @@ -0,0 +1,40 @@ +syntax = "proto3"; +option cc_enable_arenas = true; + +package NYql.NS3.FileQueue; + +import "ydb/library/yql/dq/actors/protos/dq_events.proto"; +import "ydb/public/api/protos/ydb_issue_message.proto"; + +message TEvUpdateConsumersCount { + uint64 ConsumersCountDelta = 1; + + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvAck { + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvGetNextBatch { + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvObjectPathBatch { + bool NoMoreFiles = 1; + repeated TObjectPath ObjectPaths = 2; + + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TEvObjectPathReadError { + repeated Ydb.Issue.IssueMessage Issues = 1; + + optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100; +} + +message TObjectPath { + uint64 PathIndex = 1; + uint64 Size = 2; + string Path = 3; +} diff --git a/ydb/library/yql/providers/s3/proto/source.proto b/ydb/library/yql/providers/s3/proto/source.proto index fdf63c74acae..1ac3fd840d12 100644 --- a/ydb/library/yql/providers/s3/proto/source.proto +++ b/ydb/library/yql/providers/s3/proto/source.proto @@ -21,4 +21,5 @@ message TSource { bool RowGroupReordering = 10; uint64 ParallelDownloadCount = 11; uint64 RowsLimitHint = 12; + bool UseRuntimeListing = 13; } diff --git a/ydb/library/yql/providers/s3/proto/ya.make b/ydb/library/yql/providers/s3/proto/ya.make index eb819cddfce5..acb43f749ab3 100644 --- a/ydb/library/yql/providers/s3/proto/ya.make +++ b/ydb/library/yql/providers/s3/proto/ya.make @@ -2,12 +2,18 @@ PROTO_LIBRARY() SRCS( credentials.proto + file_queue.proto range.proto retry_config.proto sink.proto source.proto ) +PEERDIR( + ydb/library/yql/dq/actors/protos + ydb/public/api/protos +) + IF (NOT PY_PROTOS_FOR) EXCLUDE_TAGS(GO_PROTO) ENDIF() diff --git a/ydb/library/yql/providers/s3/provider/ut/ya.make b/ydb/library/yql/providers/s3/provider/ut/ya.make index ffffd526f4cb..50c6132c8c73 100644 --- a/ydb/library/yql/providers/s3/provider/ut/ya.make +++ b/ydb/library/yql/providers/s3/provider/ut/ya.make @@ -4,4 +4,11 @@ SRCS( yql_s3_listing_strategy_ut.cpp ) +PEERDIR( + ydb/library/yql/minikql/dom + ydb/library/yql/parser/pg_wrapper + ydb/library/yql/public/udf + ydb/library/yql/public/udf/service/exception_policy +) + END() diff --git a/ydb/library/yql/providers/s3/provider/ya.make b/ydb/library/yql/providers/s3/provider/ya.make index 4d313be78d64..097c865d1e69 100644 --- a/ydb/library/yql/providers/s3/provider/ya.make +++ b/ydb/library/yql/providers/s3/provider/ya.make @@ -45,6 +45,7 @@ PEERDIR( ydb/library/yql/providers/dq/common ydb/library/yql/providers/dq/expr_nodes ydb/library/yql/providers/result/expr_nodes + ydb/library/yql/providers/s3/actors ydb/library/yql/providers/s3/common ydb/library/yql/providers/s3/expr_nodes ydb/library/yql/providers/s3/object_listers diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp index 56e5a1cd52d8..2601d3971242 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasink.cpp @@ -29,7 +29,7 @@ void ScanPlanDependencies(const TExprNode::TPtr& input, TExprNode::TListType& ch class TS3DataSinkProvider : public TDataProviderBase { public: - TS3DataSinkProvider(TS3State::TPtr state, IHTTPGateway::TPtr) + TS3DataSinkProvider(TS3State::TPtr state) : State_(state) , TypeAnnotationTransformer_(CreateS3DataSinkTypeAnnotationTransformer(State_)) , ExecutionTransformer_(CreateS3DataSinkExecTransformer(State_)) @@ -137,8 +137,8 @@ class TS3DataSinkProvider : public TDataProviderBase { } -TIntrusivePtr CreateS3DataSink(TS3State::TPtr state, IHTTPGateway::TPtr gateway) { - return new TS3DataSinkProvider(std::move(state), std::move(gateway)); +TIntrusivePtr CreateS3DataSink(TS3State::TPtr state) { + return new TS3DataSinkProvider(std::move(state)); } } // namespace NYql diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp index 4f7eb51def11..3586024ee032 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource.cpp @@ -21,9 +21,9 @@ namespace { class TS3DataSourceProvider : public TDataProviderBase { public: - TS3DataSourceProvider(TS3State::TPtr state, IHTTPGateway::TPtr gateway) + TS3DataSourceProvider(TS3State::TPtr state) : State_(std::move(state)) - , IODiscoveryTransformer_(CreateS3IODiscoveryTransformer(State_, std::move(gateway))) + , IODiscoveryTransformer_(CreateS3IODiscoveryTransformer(State_)) , ConfigurationTransformer_(MakeHolder(State_->Configuration, *State_->Types, TString{S3ProviderName})) , CallableExecutionTransformer_(CreateS3SourceCallableExecutionTransformer(State_)) , TypeAnnotationTransformer_(CreateS3DataSourceTypeAnnotationTransformer(State_)) @@ -160,8 +160,8 @@ class TS3DataSourceProvider : public TDataProviderBase { } -TIntrusivePtr CreateS3DataSource(TS3State::TPtr state, IHTTPGateway::TPtr gateway) { - return new TS3DataSourceProvider(std::move(state), std::move(gateway)); +TIntrusivePtr CreateS3DataSource(TS3State::TPtr state) { + return new TS3DataSourceProvider(std::move(state)); } } // namespace NYql diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp index cad13c9213b1..dcb4c1dc9933 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -74,6 +76,7 @@ class TS3DqIntegration: public TDqIntegrationBase { ui64 Partition(const TDqSettings&, size_t maxPartitions, const TExprNode& node, TVector& partitions, TString*, TExprContext&, bool) override { std::vector> parts; std::optional mbLimitHint; + bool hasDirectories = false; if (const TMaybeNode source = &node) { const auto settings = source.Cast().Settings().Cast(); mbLimitHint = TryExtractLimitHint(settings); @@ -87,6 +90,9 @@ class TS3DqIntegration: public TDqIntegrationBase { paths); parts.reserve(parts.size() + paths.size()); for (const auto& path : paths) { + if (path.IsDirectory) { + hasDirectories = true; + } parts.emplace_back(1U, path); } } @@ -98,6 +104,25 @@ class TS3DqIntegration: public TDqIntegrationBase { YQL_CLOG(TRACE, ProviderS3) << "limited max partitions to " << maxPartitions; } + auto useRuntimeListing = State_->Configuration->UseRuntimeListing.Get().GetOrElse(false); + + YQL_CLOG(DEBUG, ProviderS3) << " useRuntimeListing=" << useRuntimeListing; + if (useRuntimeListing) { + size_t partitionCount = hasDirectories ? maxPartitions : Min(parts.size(), maxPartitions); + partitions.reserve(partitionCount); + for (size_t i = 0; i < partitionCount; ++i) { + NS3::TRange range; + TFileTreeBuilder builder; + builder.Save(&range); + + partitions.emplace_back(); + TStringOutput out(partitions.back()); + range.Save(&out); + } + YQL_CLOG(DEBUG, ProviderS3) << " hasDirectories=" << hasDirectories << ", partitionCount=" << partitionCount << ", maxPartitions=" << maxPartitions; + return 0; + } + if (maxPartitions && parts.size() > maxPartitions) { if (const auto extraParts = parts.size() - maxPartitions; extraParts > maxPartitions) { const auto partsPerTask = (parts.size() - 1ULL) / maxPartitions + 1ULL; @@ -136,6 +161,7 @@ class TS3DqIntegration: public TDqIntegrationBase { range.Save(&out); } + YQL_CLOG(DEBUG, ProviderS3) << " hasDirectories=" << hasDirectories << ", partitionCount=" << partitions.size() << ", maxPartitions=" << maxPartitions;; return 0; } @@ -312,7 +338,7 @@ class TS3DqIntegration: public TDqIntegrationBase { return read; } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t maxPartitions) override { const TDqSource source(&node); if (const auto maySettings = source.Settings().Maybe()) { const auto settings = maySettings.Cast(); @@ -383,6 +409,120 @@ class TS3DqIntegration: public TDqIntegrationBase { srcDesc.MutableSettings()->insert({"addPathIndex", "true"}); } +#if defined(_linux_) || defined(_darwin_) + + auto useRuntimeListing = State_->Configuration->UseRuntimeListing.Get().GetOrElse(false); + srcDesc.SetUseRuntimeListing(useRuntimeListing); + + auto fileQueueBatchSizeLimit = State_->Configuration->FileQueueBatchSizeLimit.Get().GetOrElse(1000000); + srcDesc.MutableSettings()->insert({"fileQueueBatchSizeLimit", ToString(fileQueueBatchSizeLimit)}); + + auto fileQueueBatchObjectCountLimit = State_->Configuration->FileQueueBatchObjectCountLimit.Get().GetOrElse(1000); + srcDesc.MutableSettings()->insert({"fileQueueBatchObjectCountLimit", ToString(fileQueueBatchObjectCountLimit)}); + + YQL_CLOG(DEBUG, ProviderS3) << " useRuntimeListing=" << useRuntimeListing; + + if (useRuntimeListing) { + TPathList paths; + for (auto i = 0u; i < settings.Paths().Size(); ++i) { + const auto& packed = settings.Paths().Item(i); + TPathList pathsChunk; + UnpackPathsList( + packed.Data().Literal().Value(), + FromString(packed.IsText().Literal().Value()), + paths); + paths.insert(paths.end(), + std::make_move_iterator(pathsChunk.begin()), + std::make_move_iterator(pathsChunk.end())); + } + + NS3::TRange range; + range.SetStartPathIndex(0); + TFileTreeBuilder builder; + std::for_each(paths.cbegin(), paths.cend(), [&builder](const TPath& f) { + builder.AddPath(f.Path, f.Size, f.IsDirectory); + }); + builder.Save(&range); + + TVector serialized(1); + TStringOutput out(serialized.front()); + range.Save(&out); + + paths.clear(); + ReadPathsList(srcDesc, {}, serialized, paths); + + NDq::TS3ReadActorFactoryConfig readActorConfig; + ui64 fileSizeLimit = readActorConfig.FileSizeLimit; + if (srcDesc.HasFormat()) { + if (auto it = readActorConfig.FormatSizeLimits.find(srcDesc.GetFormat()); it != readActorConfig.FormatSizeLimits.end()) { + fileSizeLimit = it->second; + } + } + if (srcDesc.HasFormat() && srcDesc.HasRowType()) { + if (srcDesc.GetFormat() == "parquet") { + fileSizeLimit = readActorConfig.BlockFileSizeLimit; + } + } + + TString pathPattern = "*"; + auto pathPatternVariant = NS3Lister::ES3PatternVariant::FilePattern; + auto hasDirectories = std::find_if(paths.begin(), paths.end(), [](const TPath& a) { + return a.IsDirectory; + }) != paths.end(); + + if (hasDirectories) { + auto pathPatternValue = srcDesc.GetSettings().find("pathpattern"); + if (pathPatternValue == srcDesc.GetSettings().cend()) { + ythrow yexception() << "'pathpattern' must be configured for directory listing"; + } + pathPattern = pathPatternValue->second; + + auto pathPatternVariantValue = srcDesc.GetSettings().find("pathpatternvariant"); + if (pathPatternVariantValue == srcDesc.GetSettings().cend()) { + ythrow yexception() + << "'pathpatternvariant' must be configured for directory listing"; + } + if (!TryFromString(pathPatternVariantValue->second, pathPatternVariant)) { + ythrow yexception() + << "Unknown 'pathpatternvariant': " << pathPatternVariantValue->second; + } + } + auto consumersCount = hasDirectories ? maxPartitions : paths.size(); + + auto fileQueuePrefetchSize = State_->Configuration->FileQueuePrefetchSize.Get() + .GetOrElse(consumersCount * srcDesc.GetParallelDownloadCount() * 3); + + YQL_CLOG(DEBUG, ProviderS3) << " hasDirectories=" << hasDirectories << ", consumersCount=" << consumersCount; + + auto fileQueueActor = NActors::TActivationContext::ActorSystem()->Register( + NDq::CreateS3FileQueueActor( + 0ul, + std::move(paths), + fileQueuePrefetchSize, + fileSizeLimit, + useRuntimeListing, + consumersCount, + fileQueueBatchSizeLimit, + fileQueueBatchObjectCountLimit, + State_->Gateway, + connect.Url, + GetAuthInfo(State_->CredentialsFactory, State_->Configuration->Tokens.at(cluster)), + pathPattern, + pathPatternVariant, + NS3Lister::ES3PatternType::Wildcard + ), + NActors::TMailboxType::HTSwap, + State_->ExecutorPoolId + ); + + NActorsProto::TActorId protoId; + ActorIdToProto(fileQueueActor, &protoId); + TString stringId; + google::protobuf::TextFormat::PrintToString(protoId, &stringId); + + srcDesc.MutableSettings()->insert({"fileQueueActor", stringId}); + } +#endif protoSettings.PackFrom(srcDesc); sourceType = "S3Source"; } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp index f5b85cd31c51..2b8e9b649f38 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp @@ -79,7 +79,7 @@ struct TGeneratedColumnsConfig { class TS3IODiscoveryTransformer : public TGraphTransformerBase { public: - TS3IODiscoveryTransformer(TS3State::TPtr state, IHTTPGateway::TPtr gateway) + TS3IODiscoveryTransformer(TS3State::TPtr state) : State_(std::move(state)) , ListerFactory_(NS3Lister::MakeS3ListerFactory( State_->Configuration->MaxInflightListsPerQuery, @@ -87,7 +87,7 @@ class TS3IODiscoveryTransformer : public TGraphTransformerBase { State_->Configuration->ListingCallbackPerThreadQueueSize, State_->Configuration->RegexpCacheSize)) , ListingStrategy_(MakeS3ListingStrategy( - gateway, + State_->Gateway, ListerFactory_, State_->Configuration->MinDesiredDirectoriesOfFilesPerQuery, State_->Configuration->MaxInflightListsPerQuery, @@ -870,8 +870,8 @@ class TS3IODiscoveryTransformer : public TGraphTransformerBase { } -THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state, IHTTPGateway::TPtr gateway) { - return THolder(new TS3IODiscoveryTransformer(std::move(state), std::move(gateway))); +THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state) { + return THolder(new TS3IODiscoveryTransformer(std::move(state))); } } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp index a8de12679332..d72cd9b40535 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider.cpp @@ -31,14 +31,14 @@ TDataProviderInitializer GetS3DataProviderInitializer(IHTTPGateway::TPtr gateway if (gatewaysConfig) { state->Configuration->Init(gatewaysConfig->GetS3(), typeCtx); } - state->Configuration->AllowLocalFiles = allowLocalFiles; + state->Gateway = gateway; TDataProviderInfo info; info.Names.insert({TString{S3ProviderName}}); - info.Source = CreateS3DataSource(state, gateway); - info.Sink = CreateS3DataSink(state, gateway); + info.Source = CreateS3DataSource(state); + info.Sink = CreateS3DataSink(state); return info; }; diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider.h b/ydb/library/yql/providers/s3/provider/yql_s3_provider.h index d28144257583..dd02c29e0445 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider.h @@ -27,11 +27,13 @@ struct TS3State : public TThrRefBase TS3Configuration::TPtr Configuration = MakeIntrusive(); const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry = nullptr; ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory; + IHTTPGateway::TPtr Gateway; + ui32 ExecutorPoolId = 0; }; TDataProviderInitializer GetS3DataProviderInitializer(IHTTPGateway::TPtr gateway, ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory = nullptr, bool allowLocalFiles = false); -TIntrusivePtr CreateS3DataSource(TS3State::TPtr state, IHTTPGateway::TPtr gateway); -TIntrusivePtr CreateS3DataSink(TS3State::TPtr state, IHTTPGateway::TPtr gateway); +TIntrusivePtr CreateS3DataSource(TS3State::TPtr state); +TIntrusivePtr CreateS3DataSink(TS3State::TPtr state); } // namespace NYql diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h b/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h index e3694a3ba993..b399fd537356 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider_impl.h @@ -18,7 +18,7 @@ THolder CreateS3DataSinkExecTransformer(TS3State::TPtr sta THolder CreateS3LogicalOptProposalTransformer(TS3State::TPtr state); THolder CreateS3SourceCallableExecutionTransformer(TS3State::TPtr state); -THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state, IHTTPGateway::TPtr gateway); +THolder CreateS3IODiscoveryTransformer(TS3State::TPtr state); THolder CreateS3PhysicalOptProposalTransformer(TS3State::TPtr state); TExprNode::TPtr ExtractFormat(TExprNode::TListType& settings); diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp index 411c6f3ada2a..a5c373ff677e 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp @@ -22,6 +22,10 @@ TS3Configuration::TS3Configuration() REGISTER_SETTING(*this, AtomicUploadCommit); REGISTER_SETTING(*this, UseConcurrentDirectoryLister); REGISTER_SETTING(*this, MaxDiscoveryFilesPerDirectory).Lower(1); + REGISTER_SETTING(*this, UseRuntimeListing); + REGISTER_SETTING(*this, FileQueueBatchSizeLimit); + REGISTER_SETTING(*this, FileQueueBatchObjectCountLimit); + REGISTER_SETTING(*this, FileQueuePrefetchSize); } TS3Settings::TConstPtr TS3Configuration::Snapshot() const { diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h index 9b6e2c12e87d..ebf6851a6fd3 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h @@ -24,10 +24,14 @@ struct TS3Settings { NCommon::TConfSetting AtomicUploadCommit; // Commit each file independently, w/o transaction semantic over all files NCommon::TConfSetting UseConcurrentDirectoryLister; NCommon::TConfSetting MaxDiscoveryFilesPerDirectory; + NCommon::TConfSetting UseRuntimeListing; // Enables runtime listing + NCommon::TConfSetting FileQueueBatchSizeLimit; // Limits total size of files in one PathBatch from FileQueue + NCommon::TConfSetting FileQueueBatchObjectCountLimit; // Limits count of files in one PathBatch from FileQueue + NCommon::TConfSetting FileQueuePrefetchSize; }; struct TS3ClusterSettings { - TString Url, Token; + TString Url; }; struct TS3Configuration : public TS3Settings, public NCommon::TSettingDispatcher { diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp index 0383ee14662c..bbf8883ec5f2 100644 --- a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp +++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp @@ -94,7 +94,7 @@ class TSolomonDqIntegration: public TDqIntegrationBase { YQL_ENSURE(false, "Unimplemented"); } - void FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString& ) override { + void FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&, size_t) override { YQL_ENSURE(false, "Unimplemented"); } diff --git a/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp b/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp index ab320337bfc1..28d4aebde0f2 100644 --- a/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp +++ b/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp @@ -114,7 +114,7 @@ class TYdbDqIntegration: public TDqIntegrationBase { return read; } - void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType) override { + void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override { const TDqSource source(&node); if (const auto maySettings = source.Settings().Maybe()) { const auto settings = maySettings.Cast(); diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index a9185ddf3413..4f087ab237a6 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -1050,7 +1050,7 @@ class TCreateTableNode final: public TAstListNode { Y_ENSURE(resetableParam, "Empty parameter"); Y_ENSURE(resetableParam.IsSet(), "Can't reset " << resetableParam.GetValueReset().Name << " in create mode"); const auto& [id, value] = resetableParam.GetValueSet(); - settings = L(settings, Q(Y(Q(to_lower(id.Name)), value))); + settings = L(settings, Q(Y(Q(id.Name), value))); } if (Params.TableSettings.CompactionPolicy) { settings = L(settings, Q(Y(Q("compactionPolicy"), Params.TableSettings.CompactionPolicy))); @@ -1300,9 +1300,9 @@ class TAlterTableNode final: public TAstListNode { Y_ENSURE(resetableParam, "Empty parameter"); if (resetableParam.IsSet()) { const auto& [id, value] = resetableParam.GetValueSet(); - settings = L(settings, Q(Y(Q(to_lower(id.Name)), value))); + settings = L(settings, Q(Y(Q(id.Name), value))); } else { - settings = L(settings, Q(Y(Q(to_lower(resetableParam.GetValueReset().Name))))); + settings = L(settings, Q(Y(Q(resetableParam.GetValueReset().Name)))); } } if (Params.TableSettings.CompactionPolicy) { diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index ee7420fcef41..4975edbb1b37 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -6053,7 +6053,7 @@ Y_UNIT_TEST_SUITE(ExternalTable) { TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { if (word == "Write") { UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('dropColumns '('"my_column")#"); - UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('other_prop (String '"42")) '('x (String '"y")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('Other_Prop (String '"42")) '('x (String '"y")))))#"); UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#"); UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); } diff --git a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json index 20071b53ef3e..55985fcc6192 100644 --- a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json @@ -956,30 +956,30 @@ "test.test[join-count_bans--Results]": [], "test.test[join-grace_join2--Analyze]": [ { - "checksum": "45db7c8306c9626a640bcb81c9c76780", - "size": 4462, - "uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt" + "checksum": "759025fd6317614a253eae816ff5941d", + "size": 5059, + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt" } ], "test.test[join-grace_join2--Debug]": [ { - "checksum": "0684948a27f55b655c998444a9060053", - "size": 1890, - "uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched" + "checksum": "34fdff009f1cfcdc53164eeb5db58dd7", + "size": 2171, + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched" } ], "test.test[join-grace_join2--Plan]": [ { - "checksum": "45db7c8306c9626a640bcb81c9c76780", - "size": 4462, - "uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt" + "checksum": "759025fd6317614a253eae816ff5941d", + "size": 5059, + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt" } ], "test.test[join-grace_join2--Results]": [ { - "checksum": "65a9b307bc9899b17f61962a5d4a49fb", + "checksum": "2ad0b4f3207032d285d5f99430e9abaf", "size": 5737, - "uri": "https://{canondata_backend}/1899731/149477001e0a8762e03fe5262dd2d939b716f0bf/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt" + "uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt" } ], "test.test[join-inmem_by_uncomparable_structs--Analyze]": [ diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 0a281e083086..ff84bab161a9 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -7393,9 +7393,9 @@ ], "test_sql2yql.test[join-grace_join2]": [ { - "checksum": "4909542187f7c74060abc053d5707f26", - "size": 1627, - "uri": "https://{canondata_backend}/1942278/d84f6d9ab025b27e11f463124468076d499ed9b3/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql" + "checksum": "dec15765d9200297261bb22775ec5338", + "size": 1782, + "uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql" } ], "test_sql2yql.test[join-group_compact_by]": [ @@ -24865,9 +24865,9 @@ ], "test_sql_format.test[join-grace_join2]": [ { - "checksum": "4946227ff929407fc62f749ef756ef4d", - "size": 185, - "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql" + "checksum": "7656454a9434ff51ab800908ae346c42", + "size": 233, + "uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql" } ], "test_sql_format.test[join-group_compact_by]": [ @@ -25635,9 +25635,9 @@ ], "test_sql_format.test[join-nopushdown_filter_with_depends_on]": [ { - "checksum": "7c0b7c120f321f9b415663ece29a09cd", - "size": 247, - "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql" + "checksum": "956eea7d7ef4126950ed02a322c6c492", + "size": 272, + "uri": "https://{canondata_backend}/212715/1c52a4632d14126361f7585c218d202718c6fa0f/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql" } ], "test_sql_format.test[join-opt_on_opt_side]": [ diff --git a/ydb/library/yql/tests/sql/suites/join/grace_join2.sql b/ydb/library/yql/tests/sql/suites/join/grace_join2.sql index ee9866dbf939..1b10d992e347 100644 --- a/ydb/library/yql/tests/sql/suites/join/grace_join2.sql +++ b/ydb/library/yql/tests/sql/suites/join/grace_join2.sql @@ -7,4 +7,4 @@ from plato.customers1 as c1 join plato.customers1 as c2 -on c1.country_id = c2.country_id; +on c1.country_id = c2.country_id order by c1.customer_id, c2.customer_id; diff --git a/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql b/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql index e1dc4b96829c..8e43238765d9 100644 --- a/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql +++ b/ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql @@ -1,4 +1,5 @@ /* postgres can not */ +/* hybridfile can not */ /* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 4 */ use plato; diff --git a/ydb/library/yql/tools/dqrun/dqrun.cpp b/ydb/library/yql/tools/dqrun/dqrun.cpp index 873892e7a741..86e98772ad63 100644 --- a/ydb/library/yql/tools/dqrun/dqrun.cpp +++ b/ydb/library/yql/tools/dqrun/dqrun.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,7 @@ #include #include #include +#include #include #ifdef PROFILE_MEMORY_ALLOCATIONS @@ -226,14 +228,20 @@ class TOptPipelineConfigurator : public IPipelineConfigurator { IOutputStream* TracePlan; }; -NDq::IDqAsyncIoFactory::TPtr CreateAsyncIoFactory(const NYdb::TDriver& driver, IHTTPGateway::TPtr httpGateway, NYql::NConnector::IClient::TPtr genericClient, size_t HTTPmaxTimeSeconds, size_t maxRetriesCount) { +NDq::IDqAsyncIoFactory::TPtr CreateAsyncIoFactory( + const NYdb::TDriver& driver, + IHTTPGateway::TPtr httpGateway, + NYql::NConnector::IClient::TPtr genericClient, + ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory, + size_t HTTPmaxTimeSeconds, + size_t maxRetriesCount) { auto factory = MakeIntrusive(); RegisterDqPqReadActorFactory(*factory, driver, nullptr); RegisterYdbReadActorFactory(*factory, driver, nullptr); RegisterS3ReadActorFactory(*factory, nullptr, httpGateway, GetHTTPDefaultRetryPolicy(TDuration::Seconds(HTTPmaxTimeSeconds), maxRetriesCount), {}, nullptr); RegisterS3WriteActorFactory(*factory, nullptr, httpGateway); RegisterClickHouseReadActorFactory(*factory, nullptr, httpGateway); - RegisterGenericReadActorFactory(*factory, nullptr, genericClient); + RegisterGenericReadActorFactory(*factory, credentialsFactory, genericClient); RegisterDqPqWriteActorFactory(*factory, driver, nullptr); @@ -267,7 +275,8 @@ struct TActorIds { std::tuple, TActorIds> RunActorSystem( const TGatewaysConfig& gatewaysConfig, IMetricsRegistryPtr& metricsRegistry, - NYql::NLog::ELevel loggingLevel + NYql::NLog::ELevel loggingLevel, + ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory ) { auto actorSystemManager = std::make_unique(metricsRegistry, YqlToActorsLogLevel(loggingLevel)); TActorIds actorIds; @@ -288,7 +297,7 @@ std::tuple, TActorIds> RunActorSystem( auto httpProxy = NHttp::CreateHttpProxy(); actorIds.HttpProxy = actorSystemManager->GetActorSystem()->Register(httpProxy); - auto databaseResolver = NFq::CreateDatabaseResolver(actorIds.HttpProxy, nullptr); + auto databaseResolver = NFq::CreateDatabaseResolver(actorIds.HttpProxy, credentialsFactory); actorIds.DatabaseResolver = actorSystemManager->GetActorSystem()->Register(databaseResolver); } @@ -427,6 +436,7 @@ int RunMain(int argc, const char* argv[]) TString mountConfig; TString mestricsPusherConfig; TString udfResolver; + TString tokenAccessorEndpoint; bool udfResolverFilterSyscalls = false; TString statFile; TString metricsFile; @@ -585,6 +595,10 @@ int RunMain(int argc, const char* argv[]) failureInjections[key] = std::make_pair(ui32(0), FromString(fail)); } }); + opts.AddLongOption("token-accessor-endpoint", "Network address of Token Accessor service in format grpc(s)://host:port") + .Optional() + .RequiredArgument("ENDPOINT") + .StoreResult(&tokenAccessorEndpoint); opts.AddHelpOption('h'); opts.SetFreeArgsNum(0); @@ -745,12 +759,21 @@ int RunMain(int argc, const char* argv[]) dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway)); } + ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory; + + if (tokenAccessorEndpoint) { + TVector ss = StringSplitter(tokenAccessorEndpoint).SplitByString("://"); + YQL_ENSURE(ss.size() == 2, "Invalid tokenAccessorEndpoint: " << tokenAccessorEndpoint); + + credentialsFactory = NYql::CreateSecuredServiceAccountCredentialsOverTokenAccessorFactory(ss[1], ss[0] == "grpcs", ""); + } + auto dqCompFactory = NMiniKQL::GetCompositeWithBuiltinFactory(factories); // Actor system starts here and will be automatically destroyed when goes out of the scope. std::unique_ptr actorSystemManager; TActorIds actorIds; - std::tie(actorSystemManager, actorIds) = RunActorSystem(gatewaysConfig, metricsRegistry, loggingLevel); + std::tie(actorSystemManager, actorIds) = RunActorSystem(gatewaysConfig, metricsRegistry, loggingLevel, credentialsFactory); IHTTPGateway::TPtr httpGateway; if (gatewaysConfig.HasClickHouse()) { @@ -781,7 +804,8 @@ int RunMain(int argc, const char* argv[]) } genericClient = NConnector::MakeClientGRPC(gatewaysConfig.GetGeneric().GetConnector()); - dataProvidersInit.push_back(GetGenericDataProviderInitializer(genericClient, dbResolver)); + + dataProvidersInit.push_back(GetGenericDataProviderInitializer(genericClient, dbResolver, credentialsFactory)); } if (gatewaysConfig.HasYdb()) { @@ -847,10 +871,9 @@ int RunMain(int argc, const char* argv[]) size_t requestTimeout = gatewaysConfig.HasHttpGateway() && gatewaysConfig.GetHttpGateway().HasRequestTimeoutSeconds() ? gatewaysConfig.GetHttpGateway().GetRequestTimeoutSeconds() : 100; size_t maxRetries = gatewaysConfig.HasHttpGateway() && gatewaysConfig.GetHttpGateway().HasMaxRetries() ? gatewaysConfig.GetHttpGateway().GetMaxRetries() : 2; - bool enableSpilling = res.Has("enable-spilling"); dqGateway = CreateLocalDqGateway(funcRegistry.Get(), dqCompFactory, dqTaskTransformFactory, dqTaskPreprocessorFactories, enableSpilling, - CreateAsyncIoFactory(driver, httpGateway, genericClient, requestTimeout, maxRetries), threads, + CreateAsyncIoFactory(driver, httpGateway, genericClient, credentialsFactory, requestTimeout, maxRetries), threads, metricsRegistry, metricsPusherFactory); } diff --git a/ydb/library/yql/tools/dqrun/ya.make b/ydb/library/yql/tools/dqrun/ya.make index 34114429b797..e2df01c56264 100644 --- a/ydb/library/yql/tools/dqrun/ya.make +++ b/ydb/library/yql/tools/dqrun/ya.make @@ -42,6 +42,7 @@ ENDIF() ydb/library/yql/providers/clickhouse/provider ydb/library/yql/providers/common/comp_nodes ydb/library/yql/providers/common/proto + ydb/library/yql/providers/common/token_accessor/client ydb/library/yql/providers/common/udf_resolve ydb/library/yql/providers/generic/actors ydb/library/yql/providers/generic/provider diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 70b8a8668d11..80c39b86479a 100644 --- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -2,6 +2,8 @@ #if USE_ARROW || USE_PARQUET +#include +#include #include #include #include @@ -394,6 +396,43 @@ namespace NDB } } + static void fillArrowArrayWithDateTime64ColumnData( + const DataTypePtr & type, + ColumnPtr write_column, + const PaddedPODArray * null_bytemap, + const String & format_name, + arrow::ArrayBuilder* array_builder, + size_t start, + size_t end) + { + const auto * datetime64_type = assert_cast(type.get()); + const auto & column = assert_cast &>(*write_column); + arrow::TimestampBuilder & builder = assert_cast(*array_builder); + arrow::Status status; + + auto scale = datetime64_type->getScale(); + bool need_rescale = scale % 3; + auto rescale_multiplier = DecimalUtils::scaleMultiplier(3 - scale % 3); + for (size_t value_i = start; value_i < end; ++value_i) + { + if (null_bytemap && (*null_bytemap)[value_i]) + { + status = builder.AppendNull(); + } + else + { + auto value = static_cast(column[value_i].get>().getValue()); + if (need_rescale) + { + if (common::mulOverflow(value, rescale_multiplier, value)) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow"); + } + status = builder.Append(value); + } + checkStatus(status, write_column->getName(), format_name); + } + } + static void fillArrowArray( const String & column_name, ColumnPtr & column, @@ -454,6 +493,10 @@ namespace NDB DataTypePtr array_type = assert_cast(column_type.get())->getNestedType(); fillArrowArrayWithArrayColumnData(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); } + else if (isDateTime64(column_type)) + { + fillArrowArrayWithDateTime64ColumnData(column_type, column, null_bytemap, format_name, array_builder, start, end); + } else if (isDecimal(column_type)) { auto fill_decimal = [&](const auto & types) -> bool @@ -548,6 +591,18 @@ namespace NDB } } + static arrow::TimeUnit::type getArrowTimeUnit(const DataTypeDateTime64 * type) + { + UInt32 scale = type->getScale(); + if (scale == 0) + return arrow::TimeUnit::SECOND; + if (scale > 0 && scale <= 3) + return arrow::TimeUnit::MILLI; + if (scale > 3 && scale <= 6) + return arrow::TimeUnit::MICRO; + return arrow::TimeUnit::NANO; + } + static std::shared_ptr getArrowType( DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable) { @@ -630,6 +685,12 @@ namespace NDB getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable)); } + if (isDateTime64(column_type)) + { + const auto * datetime64_type = assert_cast(column_type.get()); + return arrow::timestamp(getArrowTimeUnit(datetime64_type), datetime64_type->getTimeZone().getTimeZone()); + } + const std::string type_name = column_type->getFamilyName(); if (const auto * arrow_type_it = std::find_if( internal_type_to_arrow_type.begin(), diff --git a/ydb/public/sdk/cpp/client/ydb_params/params.h b/ydb/public/sdk/cpp/client/ydb_params/params.h index 6e8b204408e8..5d29822f1805 100644 --- a/ydb/public/sdk/cpp/client/ydb_params/params.h +++ b/ydb/public/sdk/cpp/client/ydb_params/params.h @@ -28,6 +28,7 @@ namespace NExperimental { namespace NQuery { class TExecQueryImpl; + class TQueryClient; } class TParamsBuilder; @@ -40,6 +41,7 @@ class TParams { friend class NScripting::TScriptingClient; friend class NExperimental::TStreamQueryClient; friend class NQuery::TExecQueryImpl; + friend class NQuery::TQueryClient; friend class NYdb::TProtoAccessor; public: bool Empty() const; diff --git a/ydb/public/sdk/cpp/client/ydb_query/client.cpp b/ydb/public/sdk/cpp/client/ydb_query/client.cpp index 22d0389d3846..93926e51df36 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/client.cpp +++ b/ydb/public/sdk/cpp/client/ydb_query/client.cpp @@ -81,7 +81,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public Connections_, DbDriverState_, query, txControl, params, settings, session); } - NThreading::TFuture ExecuteScript(const TString& script, const TExecuteScriptSettings& settings) { + NThreading::TFuture ExecuteScript(const TString& script, const TMaybe& params, const TExecuteScriptSettings& settings) { using namespace Ydb::Query; auto request = MakeOperationRequest(settings); request.set_exec_mode(settings.ExecMode_); @@ -89,6 +89,11 @@ class TQueryClient::TImpl: public TClientImplCommon, public request.mutable_script_content()->set_syntax(settings.Syntax_); request.mutable_script_content()->set_text(script); SetDuration(settings.ResultsTtl_, *request.mutable_results_ttl()); + + if (params) { + *request.mutable_parameters() = params->GetProtoMap(); + } + auto promise = NThreading::NewPromise(); auto responseCb = [promise] @@ -536,7 +541,13 @@ TAsyncExecuteQueryIterator TQueryClient::StreamExecuteQuery(const TString& query NThreading::TFuture TQueryClient::ExecuteScript(const TString& script, const TExecuteScriptSettings& settings) { - return Impl_->ExecuteScript(script, settings); + return Impl_->ExecuteScript(script, {}, settings); +} + +NThreading::TFuture TQueryClient::ExecuteScript(const TString& script, + const TParams& params, const TExecuteScriptSettings& settings) +{ + return Impl_->ExecuteScript(script, params, settings); } TAsyncFetchScriptResultsResult TQueryClient::FetchScriptResults(const NKikimr::NOperationId::TOperationId& operationId, int64_t resultSetIndex, diff --git a/ydb/public/sdk/cpp/client/ydb_query/client.h b/ydb/public/sdk/cpp/client/ydb_query/client.h index b77461771729..d6a09ed6b635 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/client.h +++ b/ydb/public/sdk/cpp/client/ydb_query/client.h @@ -90,6 +90,9 @@ class TQueryClient { NThreading::TFuture ExecuteScript(const TString& script, const TExecuteScriptSettings& settings = TExecuteScriptSettings()); + NThreading::TFuture ExecuteScript(const TString& script, + const TParams& params, const TExecuteScriptSettings& settings = TExecuteScriptSettings()); + TAsyncFetchScriptResultsResult FetchScriptResults(const NKikimr::NOperationId::TOperationId& operationId, int64_t resultSetIndex, const TFetchScriptResultsSettings& settings = TFetchScriptResultsSettings()); diff --git a/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp b/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp index ceeaf25ab919..ef5854b983c2 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp +++ b/ydb/public/sdk/cpp/client/ydb_query/impl/exec_query.cpp @@ -136,16 +136,21 @@ struct TExecuteQueryBuffer : public TThrRefBase, TNonCopyable { Iterator_.ReadNext().Subscribe([self](TAsyncExecuteQueryPart partFuture) mutable { auto part = partFuture.ExtractValue(); + if (const auto& st = part.GetStats()) { + self->Stats_ = st; + } + if (!part.IsSuccess()) { + TMaybe stats; + std::swap(self->Stats_, stats); + if (part.EOS()) { TVector issues; TVector resultProtos; - TMaybe stats; TMaybe tx; std::swap(self->Issues_, issues); std::swap(self->ResultSets_, resultProtos); - std::swap(self->Stats_, stats); std::swap(self->Tx_, tx); TVector resultSets; @@ -160,7 +165,7 @@ struct TExecuteQueryBuffer : public TThrRefBase, TNonCopyable { std::move(tx) )); } else { - self->Promise_.SetValue(TExecuteQueryResult(std::move(part), {}, {}, {})); + self->Promise_.SetValue(TExecuteQueryResult(std::move(part), {}, std::move(stats), {})); } return; @@ -185,10 +190,6 @@ struct TExecuteQueryBuffer : public TThrRefBase, TNonCopyable { resultSet.mutable_rows()->Add(inRsProto.rows().begin(), inRsProto.rows().end()); } - if (const auto& st = part.GetStats()) { - self->Stats_ = st; - } - if (const auto& tx = part.GetTransaction()) { self->Tx_ = tx; } diff --git a/ydb/public/sdk/cpp/client/ydb_query/stats.cpp b/ydb/public/sdk/cpp/client/ydb_query/stats.cpp index f5fbc9d6c02e..c007547d4e84 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/stats.cpp +++ b/ydb/public/sdk/cpp/client/ydb_query/stats.cpp @@ -46,6 +46,16 @@ TMaybe TExecStats::GetPlan() const { return proto.query_plan(); } +TMaybe TExecStats::GetAst() const { + auto proto = Impl_->Proto; + + if (proto.query_ast().empty()) { + return {}; + } + + return proto.query_ast(); +} + TDuration TExecStats::GetTotalDuration() const { return TDuration::MicroSeconds(Impl_->Proto.total_duration_us()); } diff --git a/ydb/public/sdk/cpp/client/ydb_query/stats.h b/ydb/public/sdk/cpp/client/ydb_query/stats.h index 1fed19f6e353..3a62045a72f9 100644 --- a/ydb/public/sdk/cpp/client/ydb_query/stats.h +++ b/ydb/public/sdk/cpp/client/ydb_query/stats.h @@ -28,6 +28,7 @@ class TExecStats { TString ToString(bool withPlan = false) const; TMaybe GetPlan() const; + TMaybe GetAst() const; TDuration GetTotalDuration() const; TDuration GetTotalCpuTime() const; diff --git a/ydb/public/tools/lib/cmds/__init__.py b/ydb/public/tools/lib/cmds/__init__.py index 81289eb06907..591ed4dfe06b 100644 --- a/ydb/public/tools/lib/cmds/__init__.py +++ b/ydb/public/tools/lib/cmds/__init__.py @@ -8,7 +8,9 @@ import string import typing # noqa: F401 import sys +from six.moves.urllib.parse import urlparse +from ydb.library.yql.providers.common.proto.gateways_config_pb2 import TGenericConnectorConfig from ydb.tests.library.common import yatest_common from ydb.tests.library.harness.kikimr_cluster import kikimr_cluster_factory from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator @@ -255,6 +257,34 @@ def enable_tls(): return os.getenv('YDB_GRPC_ENABLE_TLS') == 'true' +def generic_connector_config(): + endpoint = os.getenv("FQ_CONNECTOR_ENDPOINT") + if not endpoint: + return None + + parsed = urlparse(endpoint) + if not parsed.hostname: + raise ValueError("Invalid host '{}' in FQ_CONNECTOR_ENDPOINT".format(parsed.hostname)) + + if not (1024 <= parsed.port <= 65535): + raise ValueError("Invalid port '{}' in FQ_CONNECTOR_ENDPOINT".format(parsed.port)) + + valid_schemes = ['grpc', 'grpcs'] + if parsed.scheme not in valid_schemes: + raise ValueError("Invalid schema '{}' in FQ_CONNECTOR_ENDPOINT (possible: {})".format(parsed.scheme, valid_schemes)) + + cfg = TGenericConnectorConfig() + cfg.Endpoint.host = parsed.hostname + cfg.Endpoint.port = parsed.port + + if parsed.scheme == 'grpc': + cfg.UseSsl = False + elif parsed.scheme == 'grpcs': + cfg.UseSsl = True + + return cfg + + def grpc_tls_data_path(arguments): default_store = arguments.ydb_working_dir if arguments.ydb_working_dir else None return os.getenv('YDB_GRPC_TLS_DATA_PATH', default_store) @@ -335,6 +365,7 @@ def deploy(arguments): default_users=default_users(), extra_feature_flags=enable_feature_flags, extra_grpc_services=arguments.enabled_grpc_services, + generic_connector_config=generic_connector_config(), **optionals ) diff --git a/ydb/public/tools/lib/cmds/ut/test.py b/ydb/public/tools/lib/cmds/ut/test.py new file mode 100644 index 000000000000..e5164d2413d5 --- /dev/null +++ b/ydb/public/tools/lib/cmds/ut/test.py @@ -0,0 +1,26 @@ +import os + +from ydb.public.tools.lib.cmds import generic_connector_config +from ydb.library.yql.providers.common.proto.gateways_config_pb2 import TGenericConnectorConfig + + +def test_kikimr_config_generator_generic_connector_config(): + os.environ["FQ_CONNECTOR_ENDPOINT"] = "grpc://localhost:50051" + + expected = TGenericConnectorConfig() + expected.Endpoint.host = "localhost" + expected.Endpoint.port = 50051 + expected.UseSsl = False + + actual = generic_connector_config() + assert actual == expected + + os.environ["FQ_CONNECTOR_ENDPOINT"] = "grpcs://localhost:50051" + + expected = TGenericConnectorConfig() + expected.Endpoint.host = "localhost" + expected.Endpoint.port = 50051 + expected.UseSsl = True + + actual = generic_connector_config() + assert actual == expected diff --git a/ydb/public/tools/lib/cmds/ut/ya.make b/ydb/public/tools/lib/cmds/ut/ya.make new file mode 100644 index 000000000000..97b269c2ee0b --- /dev/null +++ b/ydb/public/tools/lib/cmds/ut/ya.make @@ -0,0 +1,12 @@ +PY3TEST() + +PEERDIR( + ydb/public/tools/lib/cmds + ydb/library/yql/providers/common/proto +) + +TEST_SRCS( + test.py +) + +END() diff --git a/ydb/public/tools/lib/cmds/ya.make b/ydb/public/tools/lib/cmds/ya.make index d8ef2f061bd9..53ccb215ec56 100644 --- a/ydb/public/tools/lib/cmds/ya.make +++ b/ydb/public/tools/lib/cmds/ya.make @@ -4,8 +4,11 @@ PY_SRCS( ) PEERDIR( - ydb/tests/library + contrib/python/six library/python/testing/recipe + ydb/tests/library ) END() + +RECURSE_FOR_TESTS(ut) diff --git a/ydb/public/tools/local_ydb/__main__.py b/ydb/public/tools/local_ydb/__main__.py index 0d7affd19c5a..f315b682cf9a 100644 --- a/ydb/public/tools/local_ydb/__main__.py +++ b/ydb/public/tools/local_ydb/__main__.py @@ -9,7 +9,7 @@ \033[94m To deploy the local YDB cluster: - {prog} deploy --ydb-working-dir /absolute/path/to/working/directory --ydb-binary-path /path/to/kikimr/driver + {prog} deploy --ydb-working-dir /absolute/path/to/working/directory --ydb-binary-path /path/to/kikimr/driver To cleanup the deployed YDB cluster (this includes removal of working directory, all configuration files, disks and so on): diff --git a/ydb/public/tools/local_ydb/ya.make b/ydb/public/tools/local_ydb/ya.make index e5a2a4165e30..9fa570b365df 100644 --- a/ydb/public/tools/local_ydb/ya.make +++ b/ydb/public/tools/local_ydb/ya.make @@ -3,6 +3,7 @@ PY3_PROGRAM(local_ydb) PY_SRCS(__main__.py) PEERDIR( + ydb/library/yql/providers/common/proto ydb/public/tools/lib/cmds ) diff --git a/ydb/services/fq/ut_integration/fq_ut.cpp b/ydb/services/fq/ut_integration/fq_ut.cpp index 1b5764fee4cf..c011c80138d7 100644 --- a/ydb/services/fq/ut_integration/fq_ut.cpp +++ b/ydb/services/fq/ut_integration/fq_ut.cpp @@ -197,32 +197,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { } } - Y_UNIT_TEST(Basic_EmptyTable) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - UpsertToExistingTable(driver, location); - NYdb::NFq::TClient client(driver); - const TString folderId = "some_folder_id"; - { - const auto request = ::NFq::TCreateConnectionBuilder() - .SetName("testdbempty") - .CreateYdb("Root", location, "") - .Build(); - const auto result = client - .CreateConnection(request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_C(result.GetStatus() == EStatus::SUCCESS, result.GetIssues().ToString()); - } - - const TString queryId = CreateNewHistoryAndWaitFinish( - folderId, client, - "select count(*) from testdbempty.`yq/empty_table`", - FederatedQuery::QueryMeta::COMPLETED); - CheckGetResultData(client, queryId, folderId, 1, 1, 0); - } - Y_UNIT_TEST(Basic_EmptyList) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); ui16 grpc = server.GetPort(); @@ -256,32 +230,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { CreateNewHistoryAndWaitFinish(folderId, client, "select null", expectedStatus); } - SIMPLE_UNIT_FORKED_TEST(Basic_Tagged) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - NYdb::NFq::TClient client(driver); - const TString folderId = "some_folder_id"; - - - { - auto request = ::NFq::TCreateConnectionBuilder{} - .SetName("testdb00") - .CreateYdb("Root", location, "") - .Build(); - - auto result = client.CreateConnection( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - } - - auto expectedStatus = FederatedQuery::QueryMeta::COMPLETED; - CreateNewHistoryAndWaitFinish(folderId, client, "select AsTagged(count(*), \"tag\") from testdb00.`yq/connections`", expectedStatus); - } - Y_UNIT_TEST(Basic_TaggedLiteral) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); ui16 grpc = server.GetPort(); @@ -295,50 +243,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { } // use fork for data test due to ch initialization problem - SIMPLE_UNIT_FORKED_TEST(ExtendedDatabaseId) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - - NYdb::NFq::TClient client(driver); - const TString folderId = "folder_id_" + CreateGuidAsString(); - { - const auto request = ::NFq::TCreateConnectionBuilder() - .SetName("testdb01") - .CreateYdb("FakeDatabaseId", "") - .Build(); - const auto result = client - .CreateConnection(request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_C(result.GetStatus() == EStatus::SUCCESS, result.GetIssues().ToString()); - } - - { - const auto request = ::NFq::TCreateConnectionBuilder() - .SetName("testdb02") - .CreateYdb("FakeDatabaseId", "") - .Build(); - const auto result = client - .CreateConnection(request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_C(result.GetStatus() == EStatus::SUCCESS, result.GetIssues().ToString()); - } - - { - const auto queryId = CreateNewHistoryAndWaitFinish(folderId, client, - "select count(*) from testdb01.`yq/connections`", FederatedQuery::QueryMeta::COMPLETED); - CheckGetResultData(client, queryId, folderId, 1, 1, 2); - } - - { - // test connections db with 2 databaseId - const auto queryId = CreateNewHistoryAndWaitFinish(folderId, client, - "select count(*) from testdb02.`yq/connections`", FederatedQuery::QueryMeta::COMPLETED); - CheckGetResultData(client, queryId, folderId, 1, 1, 2); - } - } - Y_UNIT_TEST(DescribeConnection) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); ui16 grpc = server.GetPort(); @@ -855,70 +759,6 @@ Y_UNIT_TEST_SUITE(Yq_1) { } } -Y_UNIT_TEST_SUITE(Yq_2) { - SIMPLE_UNIT_FORKED_TEST(ReadFromYdbOverYq) { - TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); - ui16 grpc = server.GetPort(); - TString location = TStringBuilder() << "localhost:" << grpc; - auto driver = TDriver(TDriverConfig().SetEndpoint(location).SetAuthToken("root@builtin")); - NYdb::NFq::TClient client(driver); - const auto folderId = TString(__func__) + "folder_id"; - - { - auto request = ::NFq::TCreateConnectionBuilder{} - .SetName("testdb00") - .CreateYdb("Root", location, "") - .Build(); - - auto result = client.CreateConnection( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - } - - TString queryId; - { - auto request = ::NFq::TCreateQueryBuilder{} - .SetText("select count(*) from testdb00.`yq/connections`") - .Build(); - auto result = client.CreateQuery( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - queryId = result.GetResult().query_id(); - } - - { - auto request = ::NFq::TDescribeQueryBuilder{}.SetQueryId(queryId).Build(); - auto result = DoWithRetryOnRetCode([&]() { - auto result = client.DescribeQuery( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - const auto status = result.GetResult().query().meta().status(); - PrintProtoIssues(result.GetResult().query().issue()); - return status == FederatedQuery::QueryMeta::COMPLETED; - }, TRetryOptions(10)); - UNIT_ASSERT_C(result, "the execution of the query did not end within the time limit"); - } - - { - auto request = ::NFq::TGetResultDataBuilder{}.SetQueryId(queryId).Build(); - auto result = client.GetResultData( - request, CreateFqSettings(folderId)) - .ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - const auto& resultSet = result.GetResult().result_set(); - UNIT_ASSERT_VALUES_EQUAL(resultSet.rows().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(resultSet.columns().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(resultSet.rows(0).items(0).uint64_value(), 1); - } - } -} - Y_UNIT_TEST_SUITE(PrivateApi) { Y_UNIT_TEST(PingTask) { TKikimrWithGrpcAndRootSchema server({}, {}, {}, true); diff --git a/ydb/tests/fq/s3/canondata/result.json b/ydb/tests/fq/s3/canondata/result.json index e02eb08a50c9..9b05383e5ac5 100644 --- a/ydb/tests/fq/s3/canondata/result.json +++ b/ydb/tests/fq/s3/canondata/result.json @@ -77,6 +77,9 @@ "test_format_setting.TestS3.test_timestamp_simple_format_insert[v1-common/simple_format/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_format_insert[v1-common/simple_format/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_format_insert[v1-common/simple_format/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv" }, @@ -86,6 +89,9 @@ "test_format_setting.TestS3.test_timestamp_simple_format_insert[v2-common/simple_format/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_format_insert[v2-common/simple_format/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_format_insert[v2-common/simple_format/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv" }, @@ -95,6 +101,9 @@ "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v1-timestamp/simple_iso/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v1-timestamp/simple_iso/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v1-timestamp/simple_iso/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv" }, @@ -104,6 +113,9 @@ "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v2-timestamp/simple_iso/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v2-timestamp/simple_iso/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_iso_insert[v2-timestamp/simple_iso/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv" }, @@ -113,6 +125,9 @@ "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v1-common/simple_posix/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v1-common/simple_posix/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v1-common/simple_posix/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv" }, @@ -122,6 +137,9 @@ "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v2-common/simple_posix/test.json-json_each_row]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json" }, + "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v2-common/simple_posix/test.parquet-parquet]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_simple_posix_insert[v2-common/simple_posix/test.tsv-tsv_with_names]": { "uri": "file://test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv" }, @@ -143,6 +161,15 @@ "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.json-json_each_row-UNIX_TIME_SECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json" }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MICROSECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MILLISECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_SECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_unix_time_insert[v1-timestamp/unix_time/test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv" }, @@ -170,6 +197,15 @@ "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.json-json_each_row-UNIX_TIME_SECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json" }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MICROSECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_MILLISECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet" + }, + "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.parquet-parquet-UNIX_TIME_SECONDS]": { + "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet" + }, "test_format_setting.TestS3.test_timestamp_unix_time_insert[v2-timestamp/unix_time/test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS]": { "uri": "file://test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv" }, diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv deleted file mode 100644 index d5849fbf9c86..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.csv-csv_with_names_/date_time_format_common_simple_format_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20",100 -"Apple",2,"2022-10-21",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json deleted file mode 100644 index 29b2d985fc93..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.json-json_each_row_/date_time_format_common_simple_format_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-21","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.parquet-parquet_/date_time_format_common_simple_format_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.parquet-parquet_/date_time_format_common_simple_format_test.parquet deleted file mode 100644 index d5de5793afcd..000000000000 Binary files a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.parquet-parquet_/date_time_format_common_simple_format_test.parquet and /dev/null differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv deleted file mode 100644 index 2f5ff74076fb..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/date_time_format_common_simple_format_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 100 -Apple 2 2022-10-21 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv deleted file mode 100644 index e0c73f1170e2..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.csv-csv_with_names_/date_time_simple_iso_test.csv +++ /dev/null @@ -1,4 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20T16:40:47Z",100 -"Apple",2,"2022-10-20T13:40:47Z",22 -"Pear",15,"2022-10-20T16:40:47Z",33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json deleted file mode 100644 index 88b515dd7ac4..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.json-json_each_row_/date_time_simple_iso_test.json +++ /dev/null @@ -1,3 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20T16:40:47Z","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20T13:40:47Z","Weight":22} -{"Fruit":"Pear","Price":15,"Time":"2022-10-20T16:40:47Z","Weight":33} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.parquet-parquet_/date_time_simple_iso_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.parquet-parquet_/date_time_simple_iso_test.parquet deleted file mode 100644 index 00869d5441c4..000000000000 Binary files a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.parquet-parquet_/date_time_simple_iso_test.parquet and /dev/null differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv deleted file mode 100644 index 2c33d3d95966..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_iso_insert_date_time_simple_iso_test.tsv-tsv_with_names_/date_time_simple_iso_test.tsv +++ /dev/null @@ -1,4 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20T16:40:47Z 100 -Apple 2 2022-10-20T13:40:47Z 22 -Pear 15 2022-10-20T16:40:47Z 33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv deleted file mode 100644 index 3084851bff2e..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20 16:40:47",100 -"Apple",2,"2022-10-20 16:41:47",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json deleted file mode 100644 index c5ac9fb0f5a8..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20 16:40:47","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20 16:41:47","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv deleted file mode 100644 index 5353438e9ef0..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 16:40:47 100 -Apple 2 2022-10-20 16:41:47 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv deleted file mode 100644 index d5849fbf9c86..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.csv-csv_with_names_/timestamp_format_common_simple_format_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20",100 -"Apple",2,"2022-10-21",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json deleted file mode 100644 index 29b2d985fc93..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.json-json_each_row_/timestamp_format_common_simple_format_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-21","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv deleted file mode 100644 index 2f5ff74076fb..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_common_simple_format_test.tsv-tsv_with_names_/timestamp_format_common_simple_format_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 100 -Apple 2 2022-10-21 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet similarity index 51% rename from ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet rename to ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet index d70e27676f48..790de8357026 100644 Binary files a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_date_time_simple_posix_insert_common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v1-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet new file mode 100644 index 000000000000..790de8357026 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_format_insert_v2-common_simple_format_test.parquet-parquet_/timestamp_format_common_simple_format_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.csv-csv_with_names_/timestamp_simple_iso_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.csv-csv_with_names_/timestamp_simple_iso_test.csv deleted file mode 100644 index a17d70261ad6..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.csv-csv_with_names_/timestamp_simple_iso_test.csv +++ /dev/null @@ -1,5 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20T16:40:47.218000Z",100 -"Apple",2,"2022-10-20T13:40:47.000000Z",22 -"Pear",15,"2022-10-20T16:40:47.000000Z",33 -"Orange",1,"2022-10-20T16:40:47.218000Z",2 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json deleted file mode 100644 index 364dcad2002b..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.json-json_each_row_/timestamp_simple_iso_test.json +++ /dev/null @@ -1,4 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20T16:40:47.218000Z","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20T13:40:47.000000Z","Weight":22} -{"Fruit":"Pear","Price":15,"Time":"2022-10-20T16:40:47.000000Z","Weight":33} -{"Fruit":"Orange","Price":1,"Time":"2022-10-20T16:40:47.218000Z","Weight":2} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv deleted file mode 100644 index f3986a8935f3..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_timestamp_simple_iso_test.tsv-tsv_with_names_/timestamp_simple_iso_test.tsv +++ /dev/null @@ -1,5 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20T16:40:47.218000Z 100 -Apple 2 2022-10-20T13:40:47.000000Z 22 -Pear 15 2022-10-20T16:40:47.000000Z 33 -Orange 1 2022-10-20T16:40:47.218000Z 2 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet new file mode 100644 index 000000000000..af3c1e98ac68 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v1-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet new file mode 100644 index 000000000000..af3c1e98ac68 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_iso_insert_v2-timestamp_simple_iso_test.parquet-parquet_/timestamp_simple_iso_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv deleted file mode 100644 index 3084851bff2e..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.csv-csv_with_names_/common_simple_posix_test.csv +++ /dev/null @@ -1,3 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"2022-10-20 16:40:47",100 -"Apple",2,"2022-10-20 16:41:47",22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json deleted file mode 100644 index c5ac9fb0f5a8..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.json-json_each_row_/common_simple_posix_test.json +++ /dev/null @@ -1,2 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"2022-10-20 16:40:47","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"2022-10-20 16:41:47","Weight":22} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv deleted file mode 100644 index 5353438e9ef0..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_common_simple_posix_test.tsv-tsv_with_names_/common_simple_posix_test.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Fruit Price Time Weight -Banana 3 2022-10-20 16:40:47 100 -Apple 2 2022-10-20 16:41:47 22 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet new file mode 100644 index 000000000000..1d17c634854c Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v1-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet new file mode 100644 index 000000000000..1d17c634854c Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_simple_posix_insert_v2-common_simple_posix_test.parquet-parquet_/common_simple_posix_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.csv deleted file mode 100644 index 836b235e6059..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.csv +++ /dev/null @@ -1,4 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"86400001669",100 -"Apple",2,"86401669570",22 -"Pear",15,"88069570474",33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.csv deleted file mode 100644 index bf1adb4f45cd..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.csv +++ /dev/null @@ -1,4 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"86401669",100 -"Apple",2,"88069570",22 -"Pear",15,"1755970474",33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.csv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.csv deleted file mode 100644 index a637ca738908..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.csv-csv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.csv +++ /dev/null @@ -1,4 +0,0 @@ -"Fruit","Price","Time","Weight" -"Banana",3,"88069",100 -"Apple",2,"1755970",22 -"Pear",15,"1669656874",33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.json deleted file mode 100644 index b30064d81bb3..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.json +++ /dev/null @@ -1,3 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"86400001669","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"86401669570","Weight":22} -{"Fruit":"Pear","Price":15,"Time":"88069570474","Weight":33} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.json deleted file mode 100644 index 373a3012744d..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.json +++ /dev/null @@ -1,3 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"86400001","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"86401669","Weight":22} -{"Fruit":"Pear","Price":15,"Time":"88069570","Weight":33} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json deleted file mode 100644 index 2266471fd0d1..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.json-json_each_row-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.json +++ /dev/null @@ -1,3 +0,0 @@ -{"Fruit":"Banana","Price":3,"Time":"86400","Weight":100} -{"Fruit":"Apple","Price":2,"Time":"86401","Weight":22} -{"Fruit":"Pear","Price":15,"Time":"88069","Weight":33} diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv deleted file mode 100644 index e4f1ae5c0a7c..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.tsv +++ /dev/null @@ -1,4 +0,0 @@ -Fruit Price Time Weight -Banana 3 86400001669 100 -Apple 2 86401669570 22 -Pear 15 88069570474 33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.tsv deleted file mode 100644 index 114c55995c5c..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.tsv +++ /dev/null @@ -1,4 +0,0 @@ -Fruit Price Time Weight -Banana 3 86401669 100 -Apple 2 88069570 22 -Pear 15 1755970474 33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.tsv b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.tsv deleted file mode 100644 index 550c08737f88..000000000000 --- a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_timestamp_unix_time_test.tsv-tsv_with_names-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.tsv +++ /dev/null @@ -1,4 +0,0 @@ -Fruit Price Time Weight -Banana 3 88069 100 -Apple 2 1755970 22 -Pear 15 1669656874 33 diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 000000000000..c6d11111da94 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 000000000000..beecd9755f52 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 000000000000..8ff52239979a Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v1-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 000000000000..c6d11111da94 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MICROSECONDS_/UNIX_TIME_MICROSECONDS_timestamp_unix_time_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 000000000000..beecd9755f52 Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_MILLISECONDS_/UNIX_TIME_MILLISECONDS_timestamp_unix_time_test.parquet differ diff --git a/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet new file mode 100644 index 000000000000..8ff52239979a Binary files /dev/null and b/ydb/tests/fq/s3/canondata/test_format_setting.TestS3.test_timestamp_unix_time_insert_v2-timestamp_unix_time_test.parquet-parquet-UNIX_TIME_SECONDS_/UNIX_TIME_SECONDS_timestamp_unix_time_test.parquet differ diff --git a/ydb/tests/fq/s3/test_bindings.py b/ydb/tests/fq/s3/test_bindings.py index 9c7ce151bdde..f3e7f21b5fbb 100644 --- a/ydb/tests/fq/s3/test_bindings.py +++ b/ydb/tests/fq/s3/test_bindings.py @@ -586,3 +586,36 @@ def test_count_for_pg_binding(self, kikimr, s3, client, pg_syntax): else: assert result_set.columns[0].type.type_id == ydb.Type.UINT64 assert result_set.rows[0].items[0].uint64_value == 1 + + @yq_all + @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) + def test_ast_in_failed_query_compilation(self, kikimr, s3, client): + resource = boto3.resource( + "s3", + endpoint_url=s3.s3_url, + aws_access_key_id="key", + aws_secret_access_key="secret_key" + ) + + bucket = resource.Bucket("bindbucket") + bucket.create(ACL='public-read') + bucket.objects.all().delete() + + connection_id = client.create_storage_connection("bb", "bindbucket").result.connection_id + + data_column = ydb.Column(name="data", type=ydb.Type(type_id=ydb.Type.PrimitiveTypeId.STRING)) + client.create_object_storage_binding(name="s3binding", + path="/", + format="raw", + connection_id=connection_id, + columns=[data_column]) + + sql = R''' + SELECT some_unknown_column FROM bindings.`s3binding`; + ''' + + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id + client.wait_query_status(query_id, fq.QueryMeta.FAILED) + + ast = client.describe_query(query_id).result.query.ast.data + assert "(\'columns \'(\'\"some_unknown_column\"))" in ast, "Invalid query ast" diff --git a/ydb/tests/fq/s3/test_explicit_partitioning.py b/ydb/tests/fq/s3/test_explicit_partitioning.py index 9d560e27677d..01d6d5d5cde4 100644 --- a/ydb/tests/fq/s3/test_explicit_partitioning.py +++ b/ydb/tests/fq/s3/test_explicit_partitioning.py @@ -16,7 +16,9 @@ class TestS3(TestYdsBase): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_partitioned_by(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_partitioned_by(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -59,7 +61,9 @@ def test_partitioned_by(self, kikimr, s3, client): "file_pattern": "*t?.csv" }) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM bindings.my_binding; ''' @@ -106,7 +110,10 @@ def test_partitioned_by(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_projection(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection(self, kikimr, s3, client, runtime_listing, yq_version): + + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -156,7 +163,9 @@ def test_projection(self, kikimr, s3, client): }, partitioned_by=["year", "month", "day"]) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM bindings.my_binding; ''' @@ -189,7 +198,9 @@ def test_projection(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_pruning(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_pruning(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -237,7 +248,9 @@ def test_pruning(self, kikimr, s3, client): "file_pattern": "*.csv" }) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM bindings.my_binding where year > 2020 order by Fruit; ''' @@ -334,7 +347,9 @@ def test_validation(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -358,7 +373,9 @@ def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("json_bucket", "json_bucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -406,7 +423,9 @@ def test_no_schema_columns_except_partitioning_ones(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_projection_date(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_date(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -450,7 +469,9 @@ def test_projection_date(self, kikimr, s3, client): }, partitioned_by=["dt"]) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM bindings.my_binding; ''' @@ -520,7 +541,9 @@ def test_projection_validate_columns(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_no_paritioning_columns(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_no_paritioning_columns(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -571,8 +594,11 @@ def test_no_paritioning_columns(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("logs2", "logs2") - sql = R''' - $projection = @@ { + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' + $projection = + @@ { "projection.enabled" : "true", "storage.location.template" : "/${date}", "projection.date.type" : "date", @@ -639,7 +665,9 @@ def test_no_paritioning_columns(self, kikimr, s3, client): ({"folder_id": "my_folder13"}, "year Uint64", False), ({"folder_id": "my_folder14"}, "year Date", False) ], indirect=["client"]) - def test_projection_integer_type_validation(self, kikimr, s3, client, column_type, is_correct): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_integer_type_validation(self, kikimr, s3, client, column_type, is_correct, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -663,7 +691,9 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "test_projection_integer_type_validation") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -677,7 +707,7 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ "storage.location.template" : "${year}-03-05" } @@; - ''' + R''' + ''' + f''' SELECT * FROM `fruitbucket`.`/` WITH ( @@ -690,7 +720,7 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ partitioned_by=(year), projection=$projection ) - '''.format(column_type=column_type) + ''' query_id = client.create_query("simple", sql).result.query_id if is_correct: @@ -726,7 +756,9 @@ def test_projection_integer_type_validation(self, kikimr, s3, client, column_typ ({"folder_id": "my_folder8"}, "year Utf8", False), ({"folder_id": "my_folder9"}, "year Date", False), ], indirect=["client"]) - def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, column_type, is_correct): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, column_type, is_correct, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -750,7 +782,9 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "test_projection_enum_type_invalid_validation") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -762,7 +796,7 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum "storage.location.template" : "${year}-03-05" } @@; - ''' + R''' + ''' + f''' SELECT * FROM `fruitbucket`.`/` WITH ( @@ -775,7 +809,7 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum partitioned_by=(year), projection=$projection ) - '''.format(column_type=column_type) + ''' query_id = client.create_query("simple", sql).result.query_id if is_correct: @@ -813,7 +847,9 @@ def test_projection_enum_type_invalid_validation(self, kikimr, s3, client, colum ({"folder_id": "my_folder15"}, "year Datetime", False), ({"folder_id": "my_folder16"}, "year Datetime NOT NULL", True), ], indirect=["client"]) - def test_projection_date_type_validation(self, kikimr, s3, client, column_type, is_correct): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_projection_date_type_validation(self, kikimr, s3, client, column_type, is_correct, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -837,7 +873,9 @@ def test_projection_date_type_validation(self, kikimr, s3, client, column_type, kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "test_projection_date_type_invalid_validation") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { @@ -853,7 +891,7 @@ def test_projection_date_type_validation(self, kikimr, s3, client, column_type, "storage.location.template" : "${year}-03-05" } @@; - ''' + R''' + ''' + f''' SELECT * FROM `fruitbucket`.`/` WITH ( @@ -866,7 +904,7 @@ def test_projection_date_type_validation(self, kikimr, s3, client, column_type, partitioned_by=(year), projection=$projection ) - '''.format(column_type=column_type) + ''' query_id = client.create_query("simple", sql).result.query_id if is_correct: @@ -1068,7 +1106,9 @@ def test_binding_projection_date_type_validation(self, kikimr, s3, client, colum @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_raw_format(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_raw_format(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -1094,7 +1134,9 @@ def test_raw_format(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("rawbucket", "raw_bucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' $projection = @@ { "projection.enabled" : "true", "storage.location.template" : "/${timestamp}", @@ -1124,6 +1166,10 @@ def test_raw_format(self, kikimr, s3, client): ) ''' + # temporary fix for dynamic listing + if yq_version == "v1": + sql = 'pragma dq.MaxTasksPerStage="10"; ' + sql + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) @@ -1145,8 +1191,9 @@ def test_raw_format(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - @pytest.mark.parametrize("blocks", [False, True]) - def test_parquet(self, kikimr, s3, blocks, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_parquet(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -1174,8 +1221,9 @@ def test_parquet(self, kikimr, s3, blocks, client): query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) - sql = 'pragma s3.UseBlocksSource="{}";'.format("true" if blocks else "false") - sql = sql + R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT foo, bar, x FROM pb.`part/` WITH ( diff --git a/ydb/tests/fq/s3/test_format_setting.py b/ydb/tests/fq/s3/test_format_setting.py index b51693e442a1..4ffc9ed05d6a 100644 --- a/ydb/tests/fq/s3/test_format_setting.py +++ b/ydb/tests/fq/s3/test_format_setting.py @@ -334,7 +334,8 @@ def test_timestamp_simple_iso(self, kikimr, s3, client, filename, type_format): @pytest.mark.parametrize("filename, type_format", [ ("timestamp/simple_iso/test.csv", "csv_with_names"), ("timestamp/simple_iso/test.tsv", "tsv_with_names"), - ("timestamp/simple_iso/test.json", "json_each_row") + ("timestamp/simple_iso/test.json", "json_each_row"), + ("timestamp/simple_iso/test.parquet", "parquet") ]) def test_timestamp_simple_iso_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -383,7 +384,8 @@ def test_timestamp_simple_posix(self, kikimr, s3, client, filename, type_format) @pytest.mark.parametrize("filename, type_format", [ ("common/simple_posix/test.csv", "csv_with_names"), ("common/simple_posix/test.tsv", "tsv_with_names"), - ("common/simple_posix/test.json", "json_each_row") + ("common/simple_posix/test.json", "json_each_row"), + ("common/simple_posix/test.parquet", "parquet") ]) def test_timestamp_simple_posix_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -432,7 +434,8 @@ def test_date_time_simple_iso(self, kikimr, s3, client, filename, type_format): @pytest.mark.parametrize("filename, type_format", [ ("date_time/simple_iso/test.csv", "csv_with_names"), ("date_time/simple_iso/test.tsv", "tsv_with_names"), - ("date_time/simple_iso/test.json", "json_each_row") + ("date_time/simple_iso/test.json", "json_each_row"), + ("date_time/simple_iso/test.parquet", "parquet") ]) def test_date_time_simple_iso_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -507,7 +510,8 @@ def test_date_time_simple_posix_insert(self, kikimr, s3, client, filename, type_ @pytest.mark.parametrize("filename, type_format", [ ("timestamp/unix_time/test.csv", "csv_with_names"), ("timestamp/unix_time/test.tsv", "tsv_with_names"), - ("timestamp/unix_time/test.json", "json_each_row") + ("timestamp/unix_time/test.json", "json_each_row"), + ("timestamp/unix_time/test.parquet", "parquet") ]) def test_timestamp_unix_time_insert(self, kikimr, s3, client, filename, type_format, timestamp_format): self.create_bucket_and_upload_file(filename, s3, kikimr) @@ -531,7 +535,8 @@ def test_timestamp_unix_time_insert(self, kikimr, s3, client, filename, type_for @pytest.mark.parametrize("filename, type_format", [ ("common/simple_format/test.csv", "csv_with_names"), ("common/simple_format/test.tsv", "tsv_with_names"), - ("common/simple_format/test.json", "json_each_row") + ("common/simple_format/test.json", "json_each_row"), + ("common/simple_format/test.parquet", "parquet") ]) def test_timestamp_simple_format_insert(self, kikimr, s3, client, filename, type_format): self.create_bucket_and_upload_file(filename, s3, kikimr) diff --git a/ydb/tests/fq/s3/test_insert.py b/ydb/tests/fq/s3/test_insert.py index 420b31046187..edfd2324d900 100644 --- a/ydb/tests/fq/s3/test_insert.py +++ b/ydb/tests/fq/s3/test_insert.py @@ -21,7 +21,7 @@ def create_bucket_and_upload_file(self, filename, s3, kikimr): @yq_all @pytest.mark.parametrize("dataset_name", ["dataset", "dataにちは% set"]) - @pytest.mark.parametrize("format", ["json_list", "json_each_row", "csv_with_names"]) + @pytest.mark.parametrize("format", ["json_list", "json_each_row", "csv_with_names", "parquet"]) @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) def test_insert(self, kikimr, s3, client, format, dataset_name): resource = boto3.resource( diff --git a/ydb/tests/fq/s3/test_s3.py b/ydb/tests/fq/s3/test_s3.py index 091f9efc90af..21fa0382a91e 100644 --- a/ydb/tests/fq/s3/test_s3.py +++ b/ydb/tests/fq/s3/test_s3.py @@ -15,7 +15,9 @@ class TestS3(TestYdsBase): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_csv(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_csv(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -42,7 +44,9 @@ def test_csv(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "fbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM fruitbucket.`fruits.csv` WITH (format=csv_with_names, SCHEMA ( @@ -79,7 +83,58 @@ def test_csv(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_raw(self, kikimr, s3, client): + def test_csv_with_hopping(self, kikimr, s3, client): + resource = boto3.resource( + "s3", + endpoint_url=s3.s3_url, + aws_access_key_id="key", + aws_secret_access_key="secret_key" + ) + + bucket = resource.Bucket("fbucket") + bucket.create(ACL='public-read') + bucket.objects.all().delete() + + s3_client = boto3.client( + "s3", + endpoint_url=s3.s3_url, + aws_access_key_id="key", + aws_secret_access_key="secret_key" + ) + + fruits = R'''Time,Fruit,Price +0,Banana,3 +1,Apple,2 +2,Pear,15''' + s3_client.put_object(Body=fruits, Bucket='fbucket', Key='fruits.csv', ContentType='text/plain') + kikimr.control_plane.wait_bootstrap(1) + client.create_storage_connection("fruitbucket", "fbucket") + + sql = R''' + SELECT COUNT(*) as count, + FROM fruitbucket.`fruits.csv` + WITH (format=csv_with_names, SCHEMA ( + Time UInt64 NOT NULL, + Fruit String NOT NULL, + Price Int NOT NULL + )) + GROUP BY HOP(CAST(Time AS Timestamp?), "PT1M", "PT1M", "PT1M") + ''' + + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id + client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) + + data = client.get_result_data(query_id) + result_set = data.result.result_set + logging.debug(str(result_set)) + assert len(result_set.columns) == 1 + assert len(result_set.rows) == 1 + assert result_set.rows[0].items[0].uint64_value == 3 + + @yq_all + @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_raw(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -106,7 +161,9 @@ def test_raw(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("rawbucket", "rbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT Data FROM rawbucket.`*` WITH (format=raw, SCHEMA ( @@ -115,6 +172,9 @@ def test_raw(self, kikimr, s3, client): ORDER BY Data DESC ''' + # if yq_version == "v1": + sql = 'pragma dq.MaxTasksPerStage="10"; ' + sql + query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) @@ -133,7 +193,8 @@ def test_raw(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) @pytest.mark.parametrize("kikimr", [{"raw": 3, "": 4}], indirect=True) - def test_limit(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_limit(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -158,7 +219,9 @@ def test_limit(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("limbucket", "lbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT Data FROM limbucket.`*` WITH (format=raw, SCHEMA ( @@ -171,7 +234,9 @@ def test_limit(self, kikimr, s3, client): client.wait_query_status(query_id, fq.QueryMeta.FAILED) assert "Size of object file1.txt = 5 and exceeds limit = 3 specified for format raw" in str(client.describe_query(query_id).result) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM limbucket.`*` WITH (format=csv_with_names, SCHEMA ( @@ -185,7 +250,8 @@ def test_limit(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_bad_format(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_bad_format(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -210,7 +276,9 @@ def test_bad_format(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("badbucket", "bbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select * from badbucket.`*.*` with (format=json_list, schema (data string)) limit 1; ''' @@ -256,7 +324,7 @@ def put_kv(k, v): client.create_yds_connection(name="yds", database_id="FakeDatabaseId") # Run query - sql = R''' + sql = f''' PRAGMA dq.MaxTasksPerStage="2"; $s3_dict_raw = @@ -279,7 +347,7 @@ def put_kv(k, v): FROM ( SELECT Yson::Parse(Data) AS yson_data - FROM yds.`{input_topic}` WITH SCHEMA (Data String NOT NULL)); + FROM yds.`{self.input_topic}` WITH SCHEMA (Data String NOT NULL)); $joined_seq = SELECT @@ -289,15 +357,11 @@ def put_kv(k, v): INNER JOIN $s3_dict AS s3_dict ON yds_seq.key = s3_dict.key; - INSERT INTO yds.`{output_topic}` + INSERT INTO yds.`{self.output_topic}` SELECT Yson::SerializeText(Yson::From(TableRow())) FROM $joined_seq; - '''\ - .format( - input_topic=self.input_topic, - output_topic=self.output_topic, - ) + ''' query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.STREAMING).result.query_id client.wait_query_status(query_id, fq.QueryMeta.RUNNING) @@ -388,7 +452,7 @@ def test_write_result(self, kikimr, s3, client, yq_version): time.sleep(10) # 2 x node info update period - sql = R''' + sql = f''' SELECT Fruit, sum(Price) as Price, sum(Weight) as Weight FROM fruitbucket.`fruits*` WITH (format=csv_with_names, SCHEMA ( @@ -418,7 +482,8 @@ def test_write_result(self, kikimr, s3, client, yq_version): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_precompute(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_precompute(self, kikimr, s3, client, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -445,7 +510,9 @@ def test_precompute(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("prebucket", "pbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select count(*) as Cnt from prebucket.`file1.txt` with (format=raw, schema( Data String NOT NULL )) @@ -476,7 +543,9 @@ def test_precompute(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_failed_precompute(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_failed_precompute(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -490,7 +559,9 @@ def test_failed_precompute(self, kikimr, s3, client): client.create_storage_connection("fp", "fpbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + insert into fp.`path/` with (format=json_each_row) select * from AS_TABLE([<|foo:123, bar:"xxx"u|>,<|foo:456, bar:"yyy"u|>]); ''' @@ -498,7 +569,9 @@ def test_failed_precompute(self, kikimr, s3, client): query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select count(*) from fp.`path/` with (format=json_each_row, schema( foo Int NOT NULL, bar String NOT NULL @@ -520,7 +593,9 @@ def test_failed_precompute(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_missed(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_missed(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -547,7 +622,9 @@ def test_missed(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "fbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT * FROM fruitbucket.`fruits.csv` WITH (format=csv_with_names, SCHEMA ( @@ -564,7 +641,9 @@ def test_missed(self, kikimr, s3, client): @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_simple_hits_47(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_simple_hits_47(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -591,7 +670,9 @@ def test_simple_hits_47(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("fruitbucket", "fbucket") - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + $data = SELECT * FROM fruitbucket.`fruits.csv` WITH (format=csv_with_names, SCHEMA ( @@ -624,7 +705,8 @@ def test_simple_hits_47(self, kikimr, s3, client): @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) @pytest.mark.parametrize("raw", [True, False]) @pytest.mark.parametrize("path_pattern", ["exact_file", "directory_scan"]) - def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern, runtime_listing, yq_version): resource = boto3.resource( "s3", @@ -662,13 +744,16 @@ def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern): else: raise ValueError(f"Unknown path_pattern {path_pattern}") - sql = R''' + format = "raw" if raw else "csv_with_names" + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT count(*) as cnt FROM i18nbucket.`{path}` WITH (format={format}, SCHEMA ( Data String )); - '''.format(path=path, format="raw" if raw else "csv_with_names") + ''' query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) @@ -687,7 +772,9 @@ def test_i18n_unpartitioned(self, kikimr, s3, client, raw, path_pattern): @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) @pytest.mark.parametrize("raw", [False, True]) @pytest.mark.parametrize("partitioning", ["hive", "projection"]) - def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_version): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_version, runtime_listing): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -718,33 +805,38 @@ def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_versi kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("i18nbucket", "ibucket") + format = "raw" if raw else "csv_with_names" if partitioning == "projection": - sql = R''' - $projection = @@ { - "projection.enabled" : "true", - "storage.location.template" : "/folder=${folder}", - "projection.folder.type" : "enum", - "projection.folder.values" : "%こん,に ちは,に" - } @@;''' + ''' - SELECT count(*) as cnt - FROM i18nbucket.`dataset` - WITH ( - format={}, - SCHEMA ( - Data String, - folder String NOT NULL - ), - partitioned_by=(folder), - projection=$projection - ) - WHERE folder = 'に ちは' or folder = '%こん'; - '''.format("raw" if raw else "csv_with_names") + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + ''' + R''' + $projection = @@ { + "projection.enabled" : "true", + "storage.location.template" : "/folder=${folder}", + "projection.folder.type" : "enum", + "projection.folder.values" : "%こん,に ちは,に" + } @@;''' + f''' + SELECT count(*) as cnt + FROM i18nbucket.`dataset` + WITH ( + format={format}, + SCHEMA ( + Data String, + folder String NOT NULL + ), + partitioned_by=(folder), + projection=$projection + ) + WHERE folder = 'に ちは' or folder = '%こん'; + ''' elif partitioning == "hive": - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + SELECT count(*) as cnt FROM i18nbucket.`dataset` WITH ( - format={}, + format={format}, SCHEMA ( Data String, folder String NOT NULL @@ -752,7 +844,7 @@ def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_versi partitioned_by=(folder) ) WHERE folder = 'に ちは' or folder = '%こん'; - '''.format("raw" if raw else "csv_with_names") + ''' else: raise ValueError(f"Unknown partitioning {partitioning}") @@ -771,7 +863,9 @@ def test_i18n_partitioning(self, kikimr, s3, client, raw, partitioning, yq_versi @yq_all @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_huge_source(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", ["false", "true"]) + def test_huge_source(self, kikimr, s3, client, runtime_listing, yq_version): + resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -785,15 +879,20 @@ def test_huge_source(self, kikimr, s3, client): kikimr.control_plane.wait_bootstrap(1) client.create_storage_connection("hugebucket", "hbucket") - sql = R''' + long_literal = "*" * 1024 + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + insert into hugebucket.`path/` with (format=csv_with_names) - select * from AS_TABLE(ListReplicate(<|s:"{}"u|>, 1024 * 10)); - '''.format("*" * 1024) + select * from AS_TABLE(ListReplicate(<|s:"{long_literal}"u|>, 1024 * 10)); + ''' query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.COMPLETED) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{runtime_listing}"; + select count(*) from hugebucket.`path/` with (format=csv_with_names, schema( s String NOT NULL )) diff --git a/ydb/tests/fq/s3/test_yq_v2.py b/ydb/tests/fq/s3/test_yq_v2.py index 88d6837a5158..baf96ea0db96 100644 --- a/ydb/tests/fq/s3/test_yq_v2.py +++ b/ydb/tests/fq/s3/test_yq_v2.py @@ -17,7 +17,8 @@ class TestS3(TestYdsBase): @yq_v2 @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) - def test_yqv2_enabled(self, kikimr, s3, client): + @pytest.mark.parametrize("runtime_listing", [False, True]) + def test_yqv2_enabled(self, kikimr, s3, client, runtime_listing): resource = boto3.resource( "s3", endpoint_url=s3.s3_url, @@ -56,7 +57,8 @@ def test_yqv2_enabled(self, kikimr, s3, client): "csv_delimiter": ";" }) - sql = R''' + sql = f''' + pragma s3.UseRuntimeListing="{str(runtime_listing).lower()}"; pragma s3.UseBlocksSource="false"; SELECT * FROM my_binding; -- syntax without bindings. supported only in yqv2 diff --git a/ydb/tests/fq/yds/test_select_1.py b/ydb/tests/fq/yds/test_select_1.py index 6c43b72ffae4..116c37dd5ee7 100644 --- a/ydb/tests/fq/yds/test_select_1.py +++ b/ydb/tests/fq/yds/test_select_1.py @@ -120,11 +120,11 @@ def test_compile_error(self, client, yq_version): assert "Failed to parse query" in describe_string, describe_string @yq_all - def test_ast_in_failed_query(self, client): - sql = "SELECT unwrap(1 / 0)" + def test_ast_in_failed_query_runtime(self, client): + sql = "SELECT unwrap(42 / 0) AS error_column" query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id client.wait_query_status(query_id, fq.QueryMeta.FAILED) - ast = str(client.describe_query(query_id).result.query.ast) - assert ast != "", "Query ast not found" + ast = client.describe_query(query_id).result.query.ast.data + assert "(\'\"error_column\" (Unwrap (/ (Int32 \'\"42\")" in ast, "Invalid query ast" diff --git a/ydb/tests/library/harness/kikimr_config.py b/ydb/tests/library/harness/kikimr_config.py index 5a8c3a680758..17f20055b437 100644 --- a/ydb/tests/library/harness/kikimr_config.py +++ b/ydb/tests/library/harness/kikimr_config.py @@ -159,7 +159,9 @@ def __init__( hive_config=None, datashard_config=None, enforce_user_token_requirement=False, - default_user_sid=None + default_user_sid=None, + pg_compatible_expirement=False, + generic_connector_config=None, # typing.Optional[TGenericConnectorConfig] ): if extra_feature_flags is None: extra_feature_flags = [] @@ -376,6 +378,41 @@ def __init__( if default_user_sid: self.yaml_config["domains_config"]["security_config"]["default_user_sids"] = [default_user_sid] + if pg_compatible_expirement: + self.yaml_config["table_service_config"]["enable_prepared_ddl"] = True + # self.yaml_config["table_service_config"]["enable_ast_cache"] = True + # self.yaml_config["table_service_config"]["enable_pg_consts_to_params"] = True + self.yaml_config["table_service_config"]["index_auto_choose_mode"] = 'max_used_prefix' + self.yaml_config["feature_flags"]['enable_temp_tables'] = True + self.yaml_config["feature_flags"]['enable_table_pg_types'] = True + + if generic_connector_config: + if "query_service_config" not in self.yaml_config: + self.yaml_config["query_service_config"] = {} + + self.yaml_config["query_service_config"]["generic"] = { + "connector": { + "endpoint": { + "host": generic_connector_config.Endpoint.host, + "port": generic_connector_config.Endpoint.port, + }, + "use_ssl": generic_connector_config.UseSsl + }, + "default_settings": [ + { + "name": "DateTimeFormat", + "value": "string" + }, + { + "name": "UsePredicatePushdown", + "value": "true" + } + ] + } + + self.yaml_config["feature_flags"]["enable_external_data_sources"] = True + self.yaml_config["feature_flags"]["enable_script_execution_operations"] = True + @property def pdisks_info(self): return self._pdisks_info diff --git a/ydb/tests/library/ut/kikimr_config.py b/ydb/tests/library/ut/kikimr_config.py new file mode 100644 index 000000000000..e32102107b49 --- /dev/null +++ b/ydb/tests/library/ut/kikimr_config.py @@ -0,0 +1,23 @@ +from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator + +from ydb.library.yql.providers.common.proto.gateways_config_pb2 import TGenericConnectorConfig + + +def test_kikimr_config_generator_generic_connector_config(): + generic_connector_config = TGenericConnectorConfig() + generic_connector_config.Endpoint.host = "localhost" + generic_connector_config.Endpoint.port = 50051 + generic_connector_config.UseSsl = False + + cfg_gen = KikimrConfigGenerator(generic_connector_config=generic_connector_config) + yaml_config = cfg_gen.yaml_config + + assert yaml_config["query_service_config"]["generic"]["connector"]["endpoint"]["host"] == generic_connector_config.Endpoint.host + assert yaml_config["query_service_config"]["generic"]["connector"]["endpoint"]["port"] == generic_connector_config.Endpoint.port + assert yaml_config["query_service_config"]["generic"]["connector"]["use_ssl"] == generic_connector_config.UseSsl + assert yaml_config["query_service_config"]["generic"]["default_settings"] == [ + {"name": "DateTimeFormat", "value": "string"}, + {"name": "UsePredicatePushdown", "value": "true"}, + ] + assert yaml_config["feature_flags"]["enable_external_data_sources"] is True + assert yaml_config["feature_flags"]["enable_script_execution_operations"] is True diff --git a/ydb/tests/library/ut/ya.make b/ydb/tests/library/ut/ya.make new file mode 100644 index 000000000000..9becd0e76ef8 --- /dev/null +++ b/ydb/tests/library/ut/ya.make @@ -0,0 +1,12 @@ +PY3TEST() + +PEERDIR( + ydb/tests/library + ydb/library/yql/providers/common/proto +) + +TEST_SRCS( + kikimr_config.py +) + +END() diff --git a/ydb/tests/library/ya.make b/ydb/tests/library/ya.make index 81b4cc2aed83..2e043071c2b2 100644 --- a/ydb/tests/library/ya.make +++ b/ydb/tests/library/ya.make @@ -95,6 +95,7 @@ PEERDIR( library/python/svn_version library/python/testing/yatest_common ydb/core/protos + ydb/library/yql/providers/common/proto ydb/public/api/grpc ydb/public/api/grpc/draft ydb/public/api/protos @@ -103,3 +104,5 @@ PEERDIR( ) END() + +RECURSE_FOR_TESTS(ut) diff --git a/ydb/tests/tools/kqprun/.gitignore b/ydb/tests/tools/kqprun/.gitignore index 9aec6451388c..e240c8c1f76b 100644 --- a/ydb/tests/tools/kqprun/.gitignore +++ b/ydb/tests/tools/kqprun/.gitignore @@ -1,3 +1,7 @@ sync_dir +example +udfs *.log +*.json *.sql +*.bin diff --git a/ydb/tests/tools/kqprun/kqprun.cpp b/ydb/tests/tools/kqprun/kqprun.cpp index 5b250f2734da..efb4f6cb5acb 100644 --- a/ydb/tests/tools/kqprun/kqprun.cpp +++ b/ydb/tests/tools/kqprun/kqprun.cpp @@ -1,5 +1,7 @@ #include "src/kqp_runner.h" +#include + #include #include @@ -22,7 +24,7 @@ struct TExecutionOptions { TString ScriptTraceId = "kqprun"; bool HasResults() const { - return ScriptQuery && ScriptQueryAction == NKikimrKqp::QUERY_ACTION_EXECUTE && !ClearExecution; + return ScriptQuery && ScriptQueryAction == NKikimrKqp::QUERY_ACTION_EXECUTE; } }; @@ -46,6 +48,10 @@ void RunScript(const TExecutionOptions& executionOptions, const NKqpRun::TRunner if (!runner.ExecuteScript(executionOptions.ScriptQuery, executionOptions.ScriptQueryAction, executionOptions.ScriptTraceId)) { ythrow yexception() << "Script execution failed"; } + Cout << colors.Yellow() << "Fetching script results..." << colors.Default() << Endl; + if (!runner.FetchScriptResults()) { + ythrow yexception() << "Fetch script results failed"; + } } else { if (!runner.ExecuteQuery(executionOptions.ScriptQuery, executionOptions.ScriptQueryAction, executionOptions.ScriptTraceId)) { ythrow yexception() << "Query execution failed"; @@ -54,11 +60,10 @@ void RunScript(const TExecutionOptions& executionOptions, const NKqpRun::TRunner } if (executionOptions.HasResults()) { - Cout << colors.Yellow() << "Writing script results..." << colors.Default() << Endl; - if (!runner.WriteScriptResults()) { - ythrow yexception() << "Writing script results failed"; - } + runner.PrintScriptResults(); } + + Cout << colors.Yellow() << "Finalization of kqp runner..." << colors.Default() << Endl; } @@ -74,6 +79,20 @@ THolder SetupDefaultFileOutput(const TString& filePath, IOutputStre } +TIntrusivePtr CreateFunctionRegistry(const TString& udfsDirectory, TVector udfsPaths) { + if (!udfsDirectory.empty() || !udfsPaths.empty()) { + NColorizer::TColors colors = NColorizer::AutoColors(Cout); + Cout << colors.Yellow() << "Fetching udfs..." << colors.Default() << Endl; + } + + NKikimr::NMiniKQL::FindUdfsInDir(udfsDirectory, &udfsPaths); + auto functionRegistry = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, udfsPaths)->Clone(); + NKikimr::NMiniKQL::FillStaticModules(*functionRegistry); + + return functionRegistry; +} + + void RunMain(int argc, const char* argv[]) { TExecutionOptions executionOptions; NKqpRun::TRunnerOptions runnerOptions; @@ -87,9 +106,11 @@ void RunMain(int argc, const char* argv[]) { TString logFile = "-"; TString appConfigFile = "./configuration/app_config.conf"; + TString traceOptType = "disabled"; TString scriptQueryAction = "execute"; TString planOutputFormat = "pretty"; TString resultOutputFormat = "rows"; + i64 resultsRowsLimit = 1000; TVector udfsPaths; TString udfsDirectory; @@ -103,7 +124,7 @@ void RunMain(int argc, const char* argv[]) { .Optional() .RequiredArgument("FILE") .StoreResult(&schemeQueryFile); - options.AddLongOption("app-config", "File with app config (TAppConfig)") + options.AddLongOption('c', "app-config", "File with app config (TAppConfig)") .Optional() .RequiredArgument("FILE") .DefaultValue(appConfigFile) @@ -135,33 +156,33 @@ void RunMain(int argc, const char* argv[]) { .NoArgument() .DefaultValue(executionOptions.ClearExecution) .SetFlag(&executionOptions.ClearExecution); - options.AddLongOption("trace-opt", "print AST in the begin of each transformation") + options.AddLongOption('T', "trace-opt", "print AST in the begin of each transformation, one of { scheme | script | all }") .Optional() - .NoArgument() - .DefaultValue(runnerOptions.YdbSettings.TraceOpt) - .SetFlag(&runnerOptions.YdbSettings.TraceOpt); - options.AddLongOption("script-action", "Script query execute action, one of { execute | explain }") + .RequiredArgument("STR") + .DefaultValue(traceOptType) + .StoreResult(&traceOptType); + options.AddLongOption('A', "script-action", "Script query execute action, one of { execute | explain }") .Optional() .RequiredArgument("STR") .DefaultValue(scriptQueryAction) .StoreResult(&scriptQueryAction); - options.AddLongOption("plan-format", "Script query plan format, one of { pretty | table | json }") + options.AddLongOption('P', "plan-format", "Script query plan format, one of { pretty | table | json }") .Optional() .RequiredArgument("STR") .DefaultValue(planOutputFormat) .StoreResult(&planOutputFormat); - options.AddLongOption("result-format", "Script query result format, one of { rows | full }") + options.AddLongOption('R', "result-format", "Script query result format, one of { rows | full }") .Optional() .RequiredArgument("STR") .DefaultValue(resultOutputFormat) .StoreResult(&resultOutputFormat); - options.AddLongOption("result-rows-limit", "Rows limit for script execution results") + options.AddLongOption('L', "result-rows-limit", "Rows limit for script execution results") .Optional() .RequiredArgument("INT") - .DefaultValue(runnerOptions.ResultsRowsLimit) - .StoreResult(&runnerOptions.ResultsRowsLimit); + .DefaultValue(resultsRowsLimit) + .StoreResult(&resultsRowsLimit); - options.AddLongOption("udf", "Load shared library with UDF by given path") + options.AddLongOption('u', "udf", "Load shared library with UDF by given path") .Optional() .RequiredArgument("FILE") .AppendTo(&udfsPaths); @@ -191,15 +212,19 @@ void RunMain(int argc, const char* argv[]) { // Runner options - if (runnerOptions.ResultsRowsLimit < 0) { - ythrow yexception() << "Results rows limit less than zero"; - } - THolder resultFileHolder = SetupDefaultFileOutput(resultOutputFile, runnerOptions.ResultOutput); THolder schemeQueryAstFileHolder = SetupDefaultFileOutput(schemeQueryAstFile, runnerOptions.SchemeQueryAstOutput); THolder scriptQueryAstFileHolder = SetupDefaultFileOutput(scriptQueryAstFile, runnerOptions.ScriptQueryAstOutput); THolder scriptQueryPlanFileHolder = SetupDefaultFileOutput(scriptQueryPlanFile, runnerOptions.ScriptQueryPlanOutput); + runnerOptions.TraceOptType = + (traceOptType == TStringBuf("all")) ? NKqpRun::TRunnerOptions::ETraceOptType::All + : (traceOptType == TStringBuf("scheme")) ? NKqpRun::TRunnerOptions::ETraceOptType::Scheme + : (traceOptType == TStringBuf("script")) ? NKqpRun::TRunnerOptions::ETraceOptType::Script + : (traceOptType == TStringBuf("disabled")) ? NKqpRun::TRunnerOptions::ETraceOptType::Disabled + : NKqpRun::TRunnerOptions::ETraceOptType::All; + runnerOptions.YdbSettings.TraceOptEnabled = runnerOptions.TraceOptType != NKqpRun::TRunnerOptions::ETraceOptType::Disabled; + runnerOptions.ResultOutputFormat = (resultOutputFormat == TStringBuf("rows")) ? NKqpRun::TRunnerOptions::EResultOutputFormat::RowsJson : (resultOutputFormat == TStringBuf("full")) ? NKqpRun::TRunnerOptions::EResultOutputFormat::FullJson @@ -215,20 +240,22 @@ void RunMain(int argc, const char* argv[]) { if (logFile != "-") { runnerOptions.YdbSettings.LogOutputFile = logFile; + std::remove(logFile.c_str()); } runnerOptions.YdbSettings.YqlToken = GetEnv("YQL_TOKEN"); - - NKikimr::NMiniKQL::FindUdfsInDir(udfsDirectory, &udfsPaths); - auto functionRegistry = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, udfsPaths)->Clone(); - NKikimr::NMiniKQL::FillStaticModules(*functionRegistry); - runnerOptions.YdbSettings.FunctionRegistry = functionRegistry.Get(); + runnerOptions.YdbSettings.FunctionRegistry = CreateFunctionRegistry(udfsDirectory, udfsPaths).Get(); TString appConfigData = TFileInput(appConfigFile).ReadAll(); if (!google::protobuf::TextFormat::ParseFromString(appConfigData, &runnerOptions.YdbSettings.AppConfig)) { ythrow yexception() << "Bad format of app configuration"; } + if (resultsRowsLimit < 0) { + ythrow yexception() << "Results rows limit less than zero"; + } + runnerOptions.YdbSettings.AppConfig.MutableQueryServiceConfig()->SetScriptResultRowsLimit(resultsRowsLimit); + RunScript(executionOptions, runnerOptions); } diff --git a/ydb/tests/tools/kqprun/src/actors.cpp b/ydb/tests/tools/kqprun/src/actors.cpp index c8c31e99151f..48f73e4cabfc 100644 --- a/ydb/tests/tools/kqprun/src/actors.cpp +++ b/ydb/tests/tools/kqprun/src/actors.cpp @@ -9,12 +9,19 @@ namespace { class TRunScriptActorMock : public NActors::TActorBootstrapped { public: - TRunScriptActorMock(THolder request, NThreading::TPromise promise, ui64 resultSizeLimit) + TRunScriptActorMock(THolder request, + NThreading::TPromise promise, + ui64 resultRowsLimit, ui64 resultSizeLimit, std::vector& resultSets) : Request_(std::move(request)) , Promise_(promise) + , ResultRowsLimit_(std::numeric_limits::max()) , ResultSizeLimit_(std::numeric_limits::max()) + , ResultSets_(resultSets) { - if (resultSizeLimit && resultSizeLimit < std::numeric_limits::max()) { + if (resultRowsLimit) { + ResultRowsLimit_ = resultRowsLimit; + } + if (resultSizeLimit) { ResultSizeLimit_ = resultSizeLimit; } } @@ -36,6 +43,28 @@ class TRunScriptActorMock : public NActors::TActorBootstrappedRecord.SetSeqNo(ev->Get()->Record.GetSeqNo()); response->Record.SetFreeSpace(ResultSizeLimit_); + auto resultSetIndex = ev->Get()->Record.GetQueryResultIndex(); + if (resultSetIndex >= ResultSets_.size()) { + ResultSets_.resize(resultSetIndex + 1); + } + + if (!ResultSets_[resultSetIndex].truncated()) { + for (auto& row : *ev->Get()->Record.MutableResultSet()->mutable_rows()) { + if (static_cast(ResultSets_[resultSetIndex].rows_size()) >= ResultRowsLimit_) { + ResultSets_[resultSetIndex].set_truncated(true); + break; + } + + if (ResultSets_[resultSetIndex].ByteSizeLong() + row.ByteSizeLong() > ResultSizeLimit_) { + ResultSets_[resultSetIndex].set_truncated(true); + break; + } + + *ResultSets_[resultSetIndex].add_rows() = std::move(row); + } + *ResultSets_[resultSetIndex].mutable_columns() = ev->Get()->Record.GetResultSet().columns(); + } + Send(ev->Sender, response.Release()); } @@ -47,13 +76,17 @@ class TRunScriptActorMock : public NActors::TActorBootstrapped Request_; NThreading::TPromise Promise_; - i64 ResultSizeLimit_; + ui64 ResultRowsLimit_; + ui64 ResultSizeLimit_; + std::vector& ResultSets_; }; } // anonymous namespace -NActors::IActor* CreateRunScriptActorMock(THolder request, NThreading::TPromise promise, ui64 resultSizeLimit) { - return new TRunScriptActorMock(std::move(request), promise, resultSizeLimit); +NActors::IActor* CreateRunScriptActorMock(THolder request, + NThreading::TPromise promise, + ui64 resultRowsLimit, ui64 resultSizeLimit, std::vector& resultSets) { + return new TRunScriptActorMock(std::move(request), promise, resultRowsLimit, resultSizeLimit, resultSets); } } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/actors.h b/ydb/tests/tools/kqprun/src/actors.h index f6cbf8b43bef..9e7a251d14ff 100644 --- a/ydb/tests/tools/kqprun/src/actors.h +++ b/ydb/tests/tools/kqprun/src/actors.h @@ -4,6 +4,8 @@ namespace NKqpRun { -NActors::IActor* CreateRunScriptActorMock(THolder request, NThreading::TPromise promise, ui64 resultSizeLimit); +NActors::IActor* CreateRunScriptActorMock(THolder request, + NThreading::TPromise promise, + ui64 resultRowsLimit, ui64 resultSizeLimit, std::vector& resultSets); } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/common.h b/ydb/tests/tools/kqprun/src/common.h index ef4b490f8bd1..1d57272b2dd9 100644 --- a/ydb/tests/tools/kqprun/src/common.h +++ b/ydb/tests/tools/kqprun/src/common.h @@ -12,23 +12,28 @@ namespace NKqpRun { struct TYdbSetupSettings { TString DomainName = "Root"; - bool TraceOpt = false; + bool TraceOptEnabled = false; TMaybe LogOutputFile; TString YqlToken; - NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry = nullptr; + TIntrusivePtr FunctionRegistry = nullptr; NKikimrConfig::TAppConfig AppConfig; }; struct TRunnerOptions { + enum class ETraceOptType { + Disabled, + Scheme, + Script, + All, + }; + enum class EResultOutputFormat { RowsJson, // Rows in json format FullJson, // Columns, rows and types in json format }; - i64 ResultsRowsLimit = 1000; - IOutputStream* ResultOutput = &Cout; IOutputStream* SchemeQueryAstOutput = nullptr; IOutputStream* ScriptQueryAstOutput = nullptr; @@ -36,6 +41,7 @@ struct TRunnerOptions { EResultOutputFormat ResultOutputFormat = EResultOutputFormat::RowsJson; NYdb::NConsoleClient::EOutputFormat PlanOutputFormat = NYdb::NConsoleClient::EOutputFormat::Default; + ETraceOptType TraceOptType = ETraceOptType::Disabled; TYdbSetupSettings YdbSettings; }; diff --git a/ydb/tests/tools/kqprun/src/kqp_runner.cpp b/ydb/tests/tools/kqprun/src/kqp_runner.cpp index d72d461c9c58..b4eacc1895fe 100644 --- a/ydb/tests/tools/kqprun/src/kqp_runner.cpp +++ b/ydb/tests/tools/kqprun/src/kqp_runner.cpp @@ -21,8 +21,11 @@ class TKqpRunner::TImpl { {} bool ExecuteSchemeQuery(const TString& query) const { + StartSchemeTraceOpt(); + TSchemeMeta meta; TRequestResult status = YdbSetup_.SchemeQueryRequest(query, meta); + TYdbSetup::StopTraceOpt(); PrintSchemeQueryAst(meta.Ast); @@ -35,6 +38,8 @@ class TKqpRunner::TImpl { } bool ExecuteScript(const TString& script, NKikimrKqp::EQueryAction action, const TString& traceId) { + StartScriptTraceOpt(); + TRequestResult status = YdbSetup_.ScriptRequest(script, action, traceId, ExecutionOperation_); if (!status.IsSuccess()) { @@ -45,9 +50,12 @@ class TKqpRunner::TImpl { return WaitScriptExecutionOperation(); } - bool ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) const { + bool ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) { + StartScriptTraceOpt(); + TQueryMeta meta; - TRequestResult status = YdbSetup_.QueryRequest(query, action, traceId, meta); + TRequestResult status = YdbSetup_.QueryRequest(query, action, traceId, meta, ResultSets_); + TYdbSetup::StopTraceOpt(); PrintScriptAst(meta.Ast); @@ -61,22 +69,32 @@ class TKqpRunner::TImpl { return true; } - bool WriteScriptResults() const { + bool FetchScriptResults() { + TYdbSetup::StopTraceOpt(); + + ResultSets_.resize(ExecutionMeta_.ResultSetsCount); for (i32 resultSetId = 0; resultSetId < ExecutionMeta_.ResultSetsCount; ++resultSetId) { - Ydb::ResultSet resultSet; - TRequestResult status = YdbSetup_.FetchScriptExecutionResultsRequest(ExecutionOperation_, resultSetId, Options_.ResultsRowsLimit, resultSet); + TRequestResult status = YdbSetup_.FetchScriptExecutionResultsRequest(ExecutionOperation_, resultSetId, ResultSets_[resultSetId]); if (!status.IsSuccess()) { Cerr << CerrColors_.Red() << "Failed to fetch result set with id " << resultSetId << ", reason:" << CerrColors_.Default() << Endl << status.ToString() << Endl; return false; } - - PrintScriptResult(resultSet); } return true; } + void PrintScriptResults() const { + Cout << CoutColors_.Cyan() << "Writing script query results" << CoutColors_.Default() << Endl; + for (size_t i = 0; i < ResultSets_.size(); ++i) { + if (ResultSets_.size() > 1) { + *Options_.ResultOutput << CoutColors_.Cyan() << "Result set " << i + 1 << ":" << CoutColors_.Default() << Endl; + } + PrintScriptResult(ResultSets_[i]); + } + } + private: bool WaitScriptExecutionOperation() { TRequestResult status; @@ -107,6 +125,18 @@ class TKqpRunner::TImpl { return true; } + void StartSchemeTraceOpt() const { + if (Options_.TraceOptType == TRunnerOptions::ETraceOptType::All || Options_.TraceOptType == TRunnerOptions::ETraceOptType::Scheme) { + YdbSetup_.StartTraceOpt(); + } + } + + void StartScriptTraceOpt() const { + if (Options_.TraceOptType == TRunnerOptions::ETraceOptType::All || Options_.TraceOptType == TRunnerOptions::ETraceOptType::Script) { + YdbSetup_.StartTraceOpt(); + } + } + void PrintSchemeQueryAst(const TString& ast) const { if (Options_.SchemeQueryAstOutput) { Cout << CoutColors_.Cyan() << "Writing scheme query ast" << CoutColors_.Default() << Endl; @@ -122,7 +152,7 @@ class TKqpRunner::TImpl { } void PrintScriptPlan(const TString& plan) const { - if (Options_.ScriptQueryAstOutput) { + if (Options_.ScriptQueryPlanOutput) { Cout << CoutColors_.Cyan() << "Writing script query plan" << CoutColors_.Default() << Endl; NYdb::NConsoleClient::TQueryPlanPrinter printer(Options_.PlanOutputFormat, true, *Options_.ScriptQueryPlanOutput); @@ -132,9 +162,17 @@ class TKqpRunner::TImpl { void PrintScriptResult(const Ydb::ResultSet& resultSet) const { switch (Options_.ResultOutputFormat) { - case TRunnerOptions::EResultOutputFormat::RowsJson: - Options_.ResultOutput->Write(NYdb::FormatResultSetJson(resultSet, NYdb::EBinaryStringEncoding::Unicode)); + case TRunnerOptions::EResultOutputFormat::RowsJson: { + NYdb::TResultSet result(resultSet); + NYdb::TResultSetParser parser(result); + while (parser.TryNextRow()) { + NJsonWriter::TBuf writer(NJsonWriter::HEM_UNSAFE, Options_.ResultOutput); + writer.SetWriteNanAsString(true); + NYdb::FormatResultRowJson(parser, result.GetColumnsMeta(), writer, NYdb::EBinaryStringEncoding::Unicode); + *Options_.ResultOutput << Endl; + } break; + } case TRunnerOptions::EResultOutputFormat::FullJson: resultSet.PrintJSON(*Options_.ResultOutput); @@ -151,6 +189,7 @@ class TKqpRunner::TImpl { TString ExecutionOperation_; TExecutionMeta ExecutionMeta_; + std::vector ResultSets_; }; @@ -172,8 +211,12 @@ bool TKqpRunner::ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction act return Impl_->ExecuteQuery(query, action, traceId); } -bool TKqpRunner::WriteScriptResults() const { - return Impl_->WriteScriptResults(); +bool TKqpRunner::FetchScriptResults() { + return Impl_->FetchScriptResults(); +} + +void TKqpRunner::PrintScriptResults() const { + Impl_->PrintScriptResults(); } } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/kqp_runner.h b/ydb/tests/tools/kqprun/src/kqp_runner.h index f2eef77bb076..de01588dce74 100644 --- a/ydb/tests/tools/kqprun/src/kqp_runner.h +++ b/ydb/tests/tools/kqprun/src/kqp_runner.h @@ -15,7 +15,9 @@ class TKqpRunner { bool ExecuteQuery(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) const; - bool WriteScriptResults() const; + bool FetchScriptResults(); + + void PrintScriptResults() const; private: class TImpl; diff --git a/ydb/tests/tools/kqprun/src/ydb_setup.cpp b/ydb/tests/tools/kqprun/src/ydb_setup.cpp index 7340de37bbe2..031522a744f0 100644 --- a/ydb/tests/tools/kqprun/src/ydb_setup.cpp +++ b/ydb/tests/tools/kqprun/src/ydb_setup.cpp @@ -104,7 +104,7 @@ class TYdbSetup::TImpl { } auto functionRegistryFactory = [this](const NKikimr::NScheme::TTypeRegistry&) { - return Settings_.FunctionRegistry; + return Settings_.FunctionRegistry.Get(); }; serverSettings.SetFrFactory(functionRegistryFactory); @@ -140,7 +140,7 @@ class TYdbSetup::TImpl { } void InitializeYqlLogger() { - if (!Settings_.TraceOpt) { + if (!Settings_.TraceOptEnabled) { return; } @@ -159,7 +159,7 @@ class TYdbSetup::TImpl { entry->SetLevel(NActors::NLog::PRI_TRACE); } - NYql::NLog::InitLogger(CreateLogBackend()); + NYql::NLog::InitLogger(NActors::CreateNullBackend()); } public: @@ -184,12 +184,14 @@ class TYdbSetup::TImpl { return RunKqpProxyRequest(std::move(event)); } - NKikimr::NKqp::TEvKqp::TEvQueryResponse::TPtr QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId) const { + NKikimr::NKqp::TEvKqp::TEvQueryResponse::TPtr QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, std::vector& resultSets) const { auto event = MakeHolder(); FillScriptRequest(query, action, traceId, event->Record); auto promise = NThreading::NewPromise(); - GetRuntime()->Register(CreateRunScriptActorMock(std::move(event), promise, Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultSizeLimit())); + auto rowsLimit = Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultRowsLimit(); + auto sizeLimit = Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultSizeLimit(); + GetRuntime()->Register(CreateRunScriptActorMock(std::move(event), promise, rowsLimit, sizeLimit, resultSets)); return promise.GetFuture().GetValueSync(); } @@ -201,17 +203,30 @@ class TYdbSetup::TImpl { return RunKqpProxyRequest(std::move(event)); } - NKikimr::NKqp::TEvKqp::TEvFetchScriptResultsResponse::TPtr FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, i64 limit) const { + NKikimr::NKqp::TEvKqp::TEvFetchScriptResultsResponse::TPtr FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId) const { TString executionId = *NKikimr::NKqp::ScriptExecutionIdFromOperation(operation); NActors::TActorId edgeActor = GetRuntime()->AllocateEdgeActor(); - NActors::IActor* fetchActor = NKikimr::NKqp::CreateGetScriptExecutionResultActor(edgeActor, Settings_.DomainName, executionId, resultSetId, 0, limit); + auto rowsLimit = Settings_.AppConfig.GetQueryServiceConfig().GetScriptResultRowsLimit(); + NActors::IActor* fetchActor = NKikimr::NKqp::CreateGetScriptExecutionResultActor(edgeActor, Settings_.DomainName, executionId, resultSetId, 0, rowsLimit ? rowsLimit : std::numeric_limits::max()); GetRuntime()->Register(fetchActor); return GetRuntime()->GrabEdgeEvent(edgeActor); } + void StartTraceOpt() const { + if (!Settings_.TraceOptEnabled) { + ythrow yexception() << "Trace opt was disabled"; + } + + NYql::NLog::YqlLogger().ResetBackend(CreateLogBackend()); + } + + static void StopTraceOpt() { + NYql::NLog::YqlLogger().ResetBackend(NActors::CreateNullBackend()); + } + private: NActors::TTestActorRuntime* GetRuntime() const { return Server_->GetRuntime(); @@ -308,8 +323,8 @@ TRequestResult TYdbSetup::ScriptRequest(const TString& script, NKikimrKqp::EQuer return TRequestResult(scriptExecutionOperation->Get()->Status, scriptExecutionOperation->Get()->Issues); } -TRequestResult TYdbSetup::QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta) const { - auto queryOperationResponse = Impl_->QueryRequest(query, action, traceId)->Get()->Record.GetRef(); +TRequestResult TYdbSetup::QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta, std::vector& resultSets) const { + auto queryOperationResponse = Impl_->QueryRequest(query, action, traceId, resultSets)->Get()->Record.GetRef(); meta.Ast = queryOperationResponse.GetResponse().GetQueryAst(); meta.Plan = queryOperationResponse.GetResponse().GetQueryPlan(); @@ -339,8 +354,8 @@ TRequestResult TYdbSetup::GetScriptExecutionOperationRequest(const TString& oper return TRequestResult(scriptExecutionOperation->Get()->Status, scriptExecutionOperation->Get()->Issues); } -TRequestResult TYdbSetup::FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, i64 limit, Ydb::ResultSet& resultSet) const { - auto scriptExecutionResults = Impl_->FetchScriptExecutionResultsRequest(operation, resultSetId, limit)->Get()->Record; +TRequestResult TYdbSetup::FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, Ydb::ResultSet& resultSet) const { + auto scriptExecutionResults = Impl_->FetchScriptExecutionResultsRequest(operation, resultSetId)->Get()->Record; resultSet = scriptExecutionResults.GetResultSet(); @@ -350,4 +365,12 @@ TRequestResult TYdbSetup::FetchScriptExecutionResultsRequest(const TString& oper return TRequestResult(scriptExecutionResults.GetStatus(), issues); } +void TYdbSetup::StartTraceOpt() const { + Impl_->StartTraceOpt(); +} + +void TYdbSetup::StopTraceOpt() { + TYdbSetup::TImpl::StopTraceOpt(); +} + } // namespace NKqpRun diff --git a/ydb/tests/tools/kqprun/src/ydb_setup.h b/ydb/tests/tools/kqprun/src/ydb_setup.h index c375d4f3326a..68a00058a3fd 100644 --- a/ydb/tests/tools/kqprun/src/ydb_setup.h +++ b/ydb/tests/tools/kqprun/src/ydb_setup.h @@ -51,11 +51,15 @@ class TYdbSetup { TRequestResult ScriptRequest(const TString& script, NKikimrKqp::EQueryAction action, const TString& traceId, TString& operation) const; - TRequestResult QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta) const; + TRequestResult QueryRequest(const TString& query, NKikimrKqp::EQueryAction action, const TString& traceId, TQueryMeta& meta, std::vector& resultSets) const; TRequestResult GetScriptExecutionOperationRequest(const TString& operation, TExecutionMeta& meta) const; - TRequestResult FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, i64 limit, Ydb::ResultSet& resultSet) const; + TRequestResult FetchScriptExecutionResultsRequest(const TString& operation, i32 resultSetId, Ydb::ResultSet& resultSet) const; + + void StartTraceOpt() const; + + static void StopTraceOpt(); private: class TImpl;