diff --git a/ydb/library/workload/abstract/workload_query_generator.h b/ydb/library/workload/abstract/workload_query_generator.h index 71d4a5ad4f5a..7db3ae1bb258 100644 --- a/ydb/library/workload/abstract/workload_query_generator.h +++ b/ydb/library/workload/abstract/workload_query_generator.h @@ -178,6 +178,7 @@ class TWorkloadParams { } virtual TString GetWorkloadName() const = 0; + virtual void Validate(const ECommandType /*commandType*/, int /*workloadType*/) {}; public: ui64 BulkSize = 10000; std::string DbPath; diff --git a/ydb/library/workload/log/log.cpp b/ydb/library/workload/log/log.cpp index 7339adce9acd..2116a57650e9 100644 --- a/ydb/library/workload/log/log.cpp +++ b/ydb/library/workload/log/log.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -252,9 +254,18 @@ class TRandomLogGenerator { return result.str(); } + TInstant UniformInstant(ui64 from, ui64 to) const { + TMersenne rnd(Seed()); + return TInstant::FromValue(rnd.Uniform(from, to)); + } + TInstant RandomInstant() const { auto result = TInstant::Now() - TDuration::Seconds(Params.TimestampSubtract); - i64 millisecondsDiff = 60 * 1000 * NormalRandom(0., Params.TimestampStandardDeviationMinutes); + ui64 timestampStandardDeviationMinutes = 0; + if (Params.TimestampStandardDeviationMinutes.Defined()) { + timestampStandardDeviationMinutes = *Params.TimestampStandardDeviationMinutes; + } + i64 millisecondsDiff = 60 * 1000 * NormalRandom(0., timestampStandardDeviationMinutes); if (millisecondsDiff >= 0) { // TDuration::MilliSeconds can't be negative for some reason... result += TDuration::MilliSeconds(millisecondsDiff); } else { @@ -279,7 +290,7 @@ class TRandomLogGenerator { for (size_t row = 0; row < count; ++row) { result.emplace_back(); result.back().LogId = CreateGuidAsString().c_str(); - result.back().Ts = RandomInstant(); + result.back().Ts = !!Params.TimestampDateFrom && !!Params.TimestampDateTo ? UniformInstant(*Params.TimestampDateFrom, *Params.TimestampDateTo) : RandomInstant(); result.back().Level = RandomNumber(10); result.back().ServiceName = RandomWord(false); result.back().Component = RandomWord(true); @@ -360,6 +371,82 @@ TQueryInfoList TLogGenerator::GetWorkload(int type) { } } +void TLogWorkloadParams::ConfigureOptsColumns(NLastGetopt::TOpts& opts) { + opts.AddLongOption("len", "String len") + .DefaultValue(StringLen).StoreResult(&StringLen); + opts.AddLongOption("int-cols", "Number of int columns") + .DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt); + opts.AddLongOption("str-cols", "Number of string columns") + .DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt); + opts.AddLongOption("key-cols", "Number of key columns") + .DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt); +} + +void TLogWorkloadParams::ConfigureOptsFillData(NLastGetopt::TOpts& opts) { + ConfigureOptsColumns(opts); + opts.AddLongOption("rows", "Number of rows to upsert") + .DefaultValue(RowsCnt).StoreResult(&RowsCnt); + opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.") + .StoreResult(&TimestampStandardDeviationMinutes); + opts.AddLongOption("date-from", "Left boundary of the interval to generate " + "timestamp uniformly from specified interval. Presents as seconds since epoch. Once this option passed, 'date-to' " + "should be passed as well. This option is mutually exclusive with 'timestamp_deviation'") + .StoreResult(&TimestampDateFrom); + opts.AddLongOption("date-to", "Right boundary of the interval to generate " + "timestamp uniformly from specified interval. Presents as seconds since epoch. Once this option passed, 'date-from' " + "should be passed as well. This option is mutually exclusive with 'timestamp_deviation'") + .StoreResult(&TimestampDateTo); + opts.AddLongOption("timestamp_subtract", "Value in seconds to subtract from timestamp. For each timestamp, this value in seconds is subtracted") + .DefaultValue(0).StoreResult(&TimestampSubtract); + opts.AddLongOption("null-percent", "Percent of nulls in generated data") + .DefaultValue(NullPercent).StoreResult(&NullPercent); +} + +void TLogWorkloadParams::Validate(const ECommandType commandType, int workloadType) { + bool timestampDevPassed = !!TimestampStandardDeviationMinutes; + const bool dateFromPassed = !!TimestampDateFrom; + const bool dateToPassed = !!TimestampDateTo; + + switch (commandType) { + case TWorkloadParams::ECommandType::Init: + break; + case TWorkloadParams::ECommandType::Run: + switch (static_cast(workloadType)) { + case TLogGenerator::EType::Insert: + case TLogGenerator::EType::Upsert: + case TLogGenerator::EType::BulkUpsert: + if (!timestampDevPassed && !dateFromPassed && !dateToPassed) { + timestampDevPassed = true; + TimestampStandardDeviationMinutes = 0; + } + + if (timestampDevPassed && (dateFromPassed || dateToPassed)) { + throw yexception() << "The `timestamp_deviation` and `date-from`, `date-to` are mutually exclusive and shouldn't be provided at once"; + } + + if ((dateFromPassed && !dateToPassed) || (!dateFromPassed && dateToPassed)) { + throw yexception() << "The `date-from` and `date-to` parameters must be provided together to specify the interval for uniform PK generation"; + } + + if (dateFromPassed && dateToPassed && *TimestampDateFrom >= *TimestampDateTo) { + throw yexception() << "Invalid interval [`date-from`, `date-to`)"; + } + + break; + case TLogGenerator::EType::Select: + break; + } + break; + case TWorkloadParams::ECommandType::Clean: + break; + case TWorkloadParams::ECommandType::Root: + break; + case TWorkloadParams::ECommandType::Import: + break; + } + return; +} + void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandType commandType, int workloadType) { opts.AddLongOption('p', "path", "Path where benchmark tables are located") .Optional() @@ -379,14 +466,7 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT .DefaultValue(PartitionSizeMb).StoreResult(&PartitionSizeMb); opts.AddLongOption("auto-partition", "Enable auto partitioning by load.") .DefaultValue(PartitionsByLoad).StoreResult(&PartitionsByLoad); - opts.AddLongOption("len", "String len") - .DefaultValue(StringLen).StoreResult(&StringLen); - opts.AddLongOption("int-cols", "Number of int columns") - .DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt); - opts.AddLongOption("str-cols", "Number of string columns") - .DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt); - opts.AddLongOption("key-cols", "Number of key columns") - .DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt); + ConfigureOptsColumns(opts); opts.AddLongOption("ttl", "TTL for timestamp column in minutes") .DefaultValue(TimestampTtlMinutes).StoreResult(&TimestampTtlMinutes); opts.AddLongOption("store", "Storage type." @@ -408,42 +488,14 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT case TLogGenerator::EType::Insert: case TLogGenerator::EType::Upsert: case TLogGenerator::EType::BulkUpsert: - opts.AddLongOption("len", "String len") - .DefaultValue(StringLen).StoreResult(&StringLen); - opts.AddLongOption("int-cols", "Number of int columns") - .DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt); - opts.AddLongOption("str-cols", "Number of string columns") - .DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt); - opts.AddLongOption("key-cols", "Number of key columns") - .DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt); - opts.AddLongOption("rows", "Number of rows to upsert") - .DefaultValue(RowsCnt).StoreResult(&RowsCnt); - opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.") - .DefaultValue(TimestampStandardDeviationMinutes).StoreResult(&TimestampStandardDeviationMinutes); - opts.AddLongOption("timestamp_subtract", "Value in seconds to subtract from timestamp. For each timestamp, this value in seconds is subtracted") - .DefaultValue(0).StoreResult(&TimestampSubtract); - opts.AddLongOption("null-percent", "Percent of nulls in generated data") - .DefaultValue(NullPercent).StoreResult(&NullPercent); + ConfigureOptsFillData(opts); break; case TLogGenerator::EType::Select: break; } break; case TWorkloadParams::ECommandType::Import: - opts.AddLongOption("len", "String len") - .DefaultValue(StringLen).StoreResult(&StringLen); - opts.AddLongOption("int-cols", "Number of int columns") - .DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt); - opts.AddLongOption("str-cols", "Number of string columns") - .DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt); - opts.AddLongOption("key-cols", "Number of key columns") - .DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt); - opts.AddLongOption("rows", "Number of rows to upsert") - .DefaultValue(RowsCnt).StoreResult(&RowsCnt); - opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.") - .DefaultValue(TimestampStandardDeviationMinutes).StoreResult(&TimestampStandardDeviationMinutes); - opts.AddLongOption("null-percent", "Percent of nulls in generated data") - .DefaultValue(NullPercent).StoreResult(&NullPercent); + ConfigureOptsFillData(opts); break; default: break; diff --git a/ydb/library/workload/log/log.h b/ydb/library/workload/log/log.h index 7145a0e04791..5a4f900ce61b 100644 --- a/ydb/library/workload/log/log.h +++ b/ydb/library/workload/log/log.h @@ -23,7 +23,9 @@ class TLogWorkloadParams : public TWorkloadParams { ui64 StrColumnsCnt = 0; ui64 IntColumnsCnt = 0; ui64 KeyColumnsCnt = 0; - ui64 TimestampStandardDeviationMinutes = 0; + TMaybe TimestampStandardDeviationMinutes; + TMaybe TimestampDateFrom; + TMaybe TimestampDateTo; ui64 TimestampTtlMinutes = 0; ui64 TimestampSubtract = 0; ui64 RowsCnt = 1; @@ -34,6 +36,11 @@ class TLogWorkloadParams : public TWorkloadParams { YDB_READONLY(EStoreType, StoreType, EStoreType::Row); TWorkloadDataInitializer::TList CreateDataInitializers() const override; + + void Validate(const ECommandType commandType, int workloadType) override; +private: + void ConfigureOptsFillData(NLastGetopt::TOpts& opts); + void ConfigureOptsColumns(NLastGetopt::TOpts& opts); }; class TLogGenerator final: public TWorkloadQueryGeneratorBase { diff --git a/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp b/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp index a5ce8a927349..341906e6446f 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp @@ -279,6 +279,7 @@ int TWorkloadCommand::RunWorkload(NYdbWorkload::IWorkloadQueryGenerator& workloa StopTime = StartTime + TDuration::Seconds(TotalSec); NPar::LocalExecutor().RunAdditionalThreads(Threads); + auto futures = NPar::LocalExecutor().ExecRangeWithFutures([this, &workloadGen, type](int id) { try { WorkerFn(id, workloadGen, type); @@ -350,6 +351,7 @@ int TWorkloadCommandRun::Run(TConfig& config) { PrepareForRun(config); Params.DbPath = config.Database; auto workloadGen = Params.CreateGenerator(); + Params.Validate(NYdbWorkload::TWorkloadParams::ECommandType::Run, Type); return RunWorkload(*workloadGen, Type); }